Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petscctable.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: void PetscSFCheckGraphSet(PetscSF,int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
18: # define PetscSFCheckGraphSet(sf,arg) do { \
19: if (PetscUnlikely(!(sf)->graphset)) \
20: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21: } while (0)
22: #else
23: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24: #endif
25: #endif
27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
29: /*@
30: PetscSFCreate - create a star forest communication context
32: Collective
34: Input Parameter:
35: . comm - communicator on which the star forest will operate
37: Output Parameter:
38: . sf - new star forest context
40: Options Database Keys:
41: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
42: . -sf_type window -Use MPI-3 one-sided window for communication
43: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
45: Level: intermediate
47: Notes:
48: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50: SFs are optimized and they have better performance than general SFs.
52: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53: @*/
54: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55: {
56: PetscSF b;
59: PetscSFInitializePackage();
61: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
63: b->nroots = -1;
64: b->nleaves = -1;
65: b->minleaf = PETSC_MAX_INT;
66: b->maxleaf = PETSC_MIN_INT;
67: b->nranks = -1;
68: b->rankorder = PETSC_TRUE;
69: b->ingroup = MPI_GROUP_NULL;
70: b->outgroup = MPI_GROUP_NULL;
71: b->graphset = PETSC_FALSE;
72: #if defined(PETSC_HAVE_DEVICE)
73: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
74: b->use_stream_aware_mpi = PETSC_FALSE;
75: b->unknown_input_stream= PETSC_FALSE;
76: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
77: b->backend = PETSCSF_BACKEND_KOKKOS;
78: #elif defined(PETSC_HAVE_CUDA)
79: b->backend = PETSCSF_BACKEND_CUDA;
80: #elif defined(PETSC_HAVE_HIP)
81: b->backend = PETSCSF_BACKEND_HIP;
82: #endif
84: #if defined(PETSC_HAVE_NVSHMEM)
85: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
86: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
87: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
88: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
89: #endif
90: #endif
91: b->vscat.from_n = -1;
92: b->vscat.to_n = -1;
93: b->vscat.unit = MPIU_SCALAR;
94: *sf = b;
95: return 0;
96: }
98: /*@
99: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101: Collective
103: Input Parameter:
104: . sf - star forest
106: Level: advanced
108: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
109: @*/
110: PetscErrorCode PetscSFReset(PetscSF sf)
111: {
113: if (sf->ops->Reset) (*sf->ops->Reset)(sf);
114: sf->nroots = -1;
115: sf->nleaves = -1;
116: sf->minleaf = PETSC_MAX_INT;
117: sf->maxleaf = PETSC_MIN_INT;
118: sf->mine = NULL;
119: sf->remote = NULL;
120: sf->graphset = PETSC_FALSE;
121: PetscFree(sf->mine_alloc);
122: PetscFree(sf->remote_alloc);
123: sf->nranks = -1;
124: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
125: sf->degreeknown = PETSC_FALSE;
126: PetscFree(sf->degree);
127: if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
128: if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
129: if (sf->multi) sf->multi->multi = NULL;
130: PetscSFDestroy(&sf->multi);
131: PetscLayoutDestroy(&sf->map);
133: #if defined(PETSC_HAVE_DEVICE)
134: for (PetscInt i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);
135: #endif
137: sf->setupcalled = PETSC_FALSE;
138: return 0;
139: }
141: /*@C
142: PetscSFSetType - Set the PetscSF communication implementation
144: Collective on PetscSF
146: Input Parameters:
147: + sf - the PetscSF context
148: - type - a known method
150: Options Database Key:
151: . -sf_type <type> - Sets the method; use -help for a list
152: of available methods (for instance, window, basic, neighbor)
154: Notes:
155: See "include/petscsf.h" for available methods (for instance)
156: + PETSCSFWINDOW - MPI-2/3 one-sided
157: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
159: Level: intermediate
161: .seealso: PetscSFType, PetscSFCreate()
162: @*/
163: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
164: {
165: PetscBool match;
166: PetscErrorCode (*r)(PetscSF);
171: PetscObjectTypeCompare((PetscObject)sf,type,&match);
172: if (match) return 0;
174: PetscFunctionListFind(PetscSFList,type,&r);
176: /* Destroy the previous PetscSF implementation context */
177: if (sf->ops->Destroy) (*(sf)->ops->Destroy)(sf);
178: PetscMemzero(sf->ops,sizeof(*sf->ops));
179: PetscObjectChangeTypeName((PetscObject)sf,type);
180: (*r)(sf);
181: return 0;
182: }
184: /*@C
185: PetscSFGetType - Get the PetscSF communication implementation
187: Not Collective
189: Input Parameter:
190: . sf - the PetscSF context
192: Output Parameter:
193: . type - the PetscSF type name
195: Level: intermediate
197: .seealso: PetscSFSetType(), PetscSFCreate()
198: @*/
199: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200: {
203: *type = ((PetscObject)sf)->type_name;
204: return 0;
205: }
207: /*@C
208: PetscSFDestroy - destroy star forest
210: Collective
212: Input Parameter:
213: . sf - address of star forest
215: Level: intermediate
217: .seealso: PetscSFCreate(), PetscSFReset()
218: @*/
219: PetscErrorCode PetscSFDestroy(PetscSF *sf)
220: {
221: if (!*sf) return 0;
223: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return 0;}
224: PetscSFReset(*sf);
225: if ((*sf)->ops->Destroy) (*(*sf)->ops->Destroy)(*sf);
226: PetscSFDestroy(&(*sf)->vscat.lsf);
227: if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
228: PetscHeaderDestroy(sf);
229: return 0;
230: }
232: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
233: {
234: PetscInt i, nleaves;
235: PetscMPIInt size;
236: const PetscInt *ilocal;
237: const PetscSFNode *iremote;
239: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
240: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
241: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
242: for (i = 0; i < nleaves; i++) {
243: const PetscInt rank = iremote[i].rank;
244: const PetscInt remote = iremote[i].index;
245: const PetscInt leaf = ilocal ? ilocal[i] : i;
249: }
250: return 0;
251: }
253: /*@
254: PetscSFSetUp - set up communication structures
256: Collective
258: Input Parameter:
259: . sf - star forest communication object
261: Level: beginner
263: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
264: @*/
265: PetscErrorCode PetscSFSetUp(PetscSF sf)
266: {
268: PetscSFCheckGraphSet(sf,1);
269: if (sf->setupcalled) return 0;
270: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
271: PetscSFCheckGraphValid_Private(sf);
272: if (!((PetscObject)sf)->type_name) PetscSFSetType(sf,PETSCSFBASIC); /* Zero all sf->ops */
273: if (sf->ops->SetUp) (*sf->ops->SetUp)(sf);
274: #if defined(PETSC_HAVE_CUDA)
275: if (sf->backend == PETSCSF_BACKEND_CUDA) {
276: sf->ops->Malloc = PetscSFMalloc_CUDA;
277: sf->ops->Free = PetscSFFree_CUDA;
278: }
279: #endif
280: #if defined(PETSC_HAVE_HIP)
281: if (sf->backend == PETSCSF_BACKEND_HIP) {
282: sf->ops->Malloc = PetscSFMalloc_HIP;
283: sf->ops->Free = PetscSFFree_HIP;
284: }
285: #endif
287: #
288: #if defined(PETSC_HAVE_KOKKOS)
289: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
290: sf->ops->Malloc = PetscSFMalloc_Kokkos;
291: sf->ops->Free = PetscSFFree_Kokkos;
292: }
293: #endif
294: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
295: sf->setupcalled = PETSC_TRUE;
296: return 0;
297: }
299: /*@
300: PetscSFSetFromOptions - set PetscSF options using the options database
302: Logically Collective
304: Input Parameter:
305: . sf - star forest
307: Options Database Keys:
308: + -sf_type - implementation type, see PetscSFSetType()
309: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
310: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
311: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
312: If true, this option only works with -use_gpu_aware_mpi 1.
313: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
314: If true, this option only works with -use_gpu_aware_mpi 1.
316: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
317: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
318: the only available is kokkos.
320: Level: intermediate
321: @*/
322: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
323: {
324: PetscSFType deft;
325: char type[256];
327: PetscBool flg;
330: PetscObjectOptionsBegin((PetscObject)sf);
331: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
332: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
333: PetscSFSetType(sf,flg ? type : deft);
334: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
335: #if defined(PETSC_HAVE_DEVICE)
336: {
337: char backendstr[32] = {0};
338: PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
339: /* Change the defaults set in PetscSFCreate() with command line options */
340: PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
341: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
342: PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
343: PetscStrcasecmp("cuda",backendstr,&isCuda);
344: PetscStrcasecmp("kokkos",backendstr,&isKokkos);
345: PetscStrcasecmp("hip",backendstr,&isHip);
346: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
347: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
348: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
349: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
351: #elif defined(PETSC_HAVE_KOKKOS)
353: #endif
354: }
355: #endif
356: if (sf->ops->SetFromOptions) (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);
357: PetscOptionsEnd();
358: return 0;
359: }
361: /*@
362: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
364: Logically Collective
366: Input Parameters:
367: + sf - star forest
368: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
370: Level: advanced
372: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
373: @*/
374: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
375: {
379: sf->rankorder = flg;
380: return 0;
381: }
383: /*@C
384: PetscSFSetGraph - Set a parallel star forest
386: Collective
388: Input Parameters:
389: + sf - star forest
390: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
391: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
392: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
393: during setup in debug mode)
394: . localmode - copy mode for ilocal
395: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
396: during setup in debug mode)
397: - remotemode - copy mode for iremote
399: Level: intermediate
401: Notes:
402: Leaf indices in ilocal must be unique, otherwise an error occurs.
404: Input arrays ilocal and iremote follow the PetscCopyMode semantics.
405: In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
406: PETSc might modify the respective array;
407: if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
408: Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
410: Fortran Notes:
411: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
413: Developer Notes:
414: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
415: This also allows to compare leaf sets of two SFs easily.
417: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
418: @*/
419: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
420: {
421: PetscBool unique, contiguous;
431: if (sf->nroots >= 0) { /* Reset only if graph already set */
432: PetscSFReset(sf);
433: }
435: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
437: sf->nroots = nroots;
438: sf->nleaves = nleaves;
440: if (localmode == PETSC_COPY_VALUES && ilocal) {
441: PetscInt *tlocal = NULL;
443: PetscMalloc1(nleaves,&tlocal);
444: PetscArraycpy(tlocal,ilocal,nleaves);
445: ilocal = tlocal;
446: }
447: if (remotemode == PETSC_COPY_VALUES) {
448: PetscSFNode *tremote = NULL;
450: PetscMalloc1(nleaves,&tremote);
451: PetscArraycpy(tremote,iremote,nleaves);
452: iremote = tremote;
453: }
455: if (nleaves && ilocal) {
456: PetscSFNode work;
458: PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
459: PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
460: unique = PetscNot(unique);
462: sf->minleaf = ilocal[0];
463: sf->maxleaf = ilocal[nleaves-1];
464: contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
465: } else {
466: sf->minleaf = 0;
467: sf->maxleaf = nleaves - 1;
468: unique = PETSC_TRUE;
469: contiguous = PETSC_TRUE;
470: }
472: if (contiguous) {
473: if (localmode == PETSC_USE_POINTER) {
474: ilocal = NULL;
475: } else {
476: PetscFree(ilocal);
477: }
478: }
479: sf->mine = ilocal;
480: if (localmode == PETSC_USE_POINTER) {
481: sf->mine_alloc = NULL;
482: } else {
483: sf->mine_alloc = ilocal;
484: }
485: sf->remote = iremote;
486: if (remotemode == PETSC_USE_POINTER) {
487: sf->remote_alloc = NULL;
488: } else {
489: sf->remote_alloc = iremote;
490: }
491: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
492: sf->graphset = PETSC_TRUE;
493: return 0;
494: }
496: /*@
497: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
499: Collective
501: Input Parameters:
502: + sf - The PetscSF
503: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
504: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
506: Notes:
507: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
508: n and N are local and global sizes of x respectively.
510: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
511: sequential vectors y on all ranks.
513: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
514: sequential vector y on rank 0.
516: In above cases, entries of x are roots and entries of y are leaves.
518: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
519: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
520: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
521: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
522: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
524: In this case, roots and leaves are symmetric.
526: Level: intermediate
527: @*/
528: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
529: {
530: MPI_Comm comm;
531: PetscInt n,N,res[2];
532: PetscMPIInt rank,size;
533: PetscSFType type;
537: PetscObjectGetComm((PetscObject)sf, &comm);
539: MPI_Comm_rank(comm,&rank);
540: MPI_Comm_size(comm,&size);
542: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
543: type = PETSCSFALLTOALL;
544: PetscLayoutCreate(comm,&sf->map);
545: PetscLayoutSetLocalSize(sf->map,size);
546: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
547: PetscLayoutSetUp(sf->map);
548: } else {
549: PetscLayoutGetLocalSize(map,&n);
550: PetscLayoutGetSize(map,&N);
551: res[0] = n;
552: res[1] = -n;
553: /* Check if n are same over all ranks so that we can optimize it */
554: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
555: if (res[0] == -res[1]) { /* same n */
556: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
557: } else {
558: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
559: }
560: PetscLayoutReference(map,&sf->map);
561: }
562: PetscSFSetType(sf,type);
564: sf->pattern = pattern;
565: sf->mine = NULL; /* Contiguous */
567: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
568: Also set other easy stuff.
569: */
570: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
571: sf->nleaves = N;
572: sf->nroots = n;
573: sf->nranks = size;
574: sf->minleaf = 0;
575: sf->maxleaf = N - 1;
576: } else if (pattern == PETSCSF_PATTERN_GATHER) {
577: sf->nleaves = rank ? 0 : N;
578: sf->nroots = n;
579: sf->nranks = rank ? 0 : size;
580: sf->minleaf = 0;
581: sf->maxleaf = rank ? -1 : N - 1;
582: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
583: sf->nleaves = size;
584: sf->nroots = size;
585: sf->nranks = size;
586: sf->minleaf = 0;
587: sf->maxleaf = size - 1;
588: }
589: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
590: sf->graphset = PETSC_TRUE;
591: return 0;
592: }
594: /*@
595: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
597: Collective
599: Input Parameter:
600: . sf - star forest to invert
602: Output Parameter:
603: . isf - inverse of sf
605: Level: advanced
607: Notes:
608: All roots must have degree 1.
610: The local space may be a permutation, but cannot be sparse.
612: .seealso: PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
615: {
616: PetscMPIInt rank;
617: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
618: const PetscInt *ilocal;
619: PetscSFNode *roots,*leaves;
622: PetscSFCheckGraphSet(sf,1);
625: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
626: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
628: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
629: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
630: for (i=0; i<maxlocal; i++) {
631: leaves[i].rank = rank;
632: leaves[i].index = i;
633: }
634: for (i=0; i <nroots; i++) {
635: roots[i].rank = -1;
636: roots[i].index = -1;
637: }
638: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
639: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
641: /* Check whether our leaves are sparse */
642: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
643: if (count == nroots) newilocal = NULL;
644: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
645: PetscMalloc1(count,&newilocal);
646: for (i=0,count=0; i<nroots; i++) {
647: if (roots[i].rank >= 0) {
648: newilocal[count] = i;
649: roots[count].rank = roots[i].rank;
650: roots[count].index = roots[i].index;
651: count++;
652: }
653: }
654: }
656: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
657: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
658: PetscFree2(roots,leaves);
659: return 0;
660: }
662: /*@
663: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
665: Collective
667: Input Parameters:
668: + sf - communication object to duplicate
669: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
671: Output Parameter:
672: . newsf - new communication object
674: Level: beginner
676: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
677: @*/
678: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
679: {
680: PetscSFType type;
681: MPI_Datatype dtype=MPIU_SCALAR;
686: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
687: PetscSFGetType(sf,&type);
688: if (type) PetscSFSetType(*newsf,type);
689: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
690: if (opt == PETSCSF_DUPLICATE_GRAPH) {
691: PetscSFCheckGraphSet(sf,1);
692: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
693: PetscInt nroots,nleaves;
694: const PetscInt *ilocal;
695: const PetscSFNode *iremote;
696: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
697: PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);
698: } else {
699: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
700: }
701: }
702: /* Since oldtype is committed, so is newtype, according to MPI */
703: if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit,&dtype);
704: (*newsf)->vscat.bs = sf->vscat.bs;
705: (*newsf)->vscat.unit = dtype;
706: (*newsf)->vscat.to_n = sf->vscat.to_n;
707: (*newsf)->vscat.from_n = sf->vscat.from_n;
708: /* Do not copy lsf. Build it on demand since it is rarely used */
710: #if defined(PETSC_HAVE_DEVICE)
711: (*newsf)->backend = sf->backend;
712: (*newsf)->unknown_input_stream= sf->unknown_input_stream;
713: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
714: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
715: #endif
716: if (sf->ops->Duplicate) (*sf->ops->Duplicate)(sf,opt,*newsf);
717: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
718: return 0;
719: }
721: /*@C
722: PetscSFGetGraph - Get the graph specifying a parallel star forest
724: Not Collective
726: Input Parameter:
727: . sf - star forest
729: Output Parameters:
730: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
731: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
732: . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
733: - iremote - remote locations of root vertices for each leaf on the current process
735: Notes:
736: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
738: The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
739: see its manpage for details.
741: Fortran Notes:
742: The returned iremote array is a copy and must be deallocated after use. Consequently, if you
743: want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
745: To check for a NULL ilocal use
746: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
748: Level: intermediate
750: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
751: @*/
752: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
753: {
755: if (sf->ops->GetGraph) {
756: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
757: } else {
758: if (nroots) *nroots = sf->nroots;
759: if (nleaves) *nleaves = sf->nleaves;
760: if (ilocal) *ilocal = sf->mine;
761: if (iremote) *iremote = sf->remote;
762: }
763: return 0;
764: }
766: /*@
767: PetscSFGetLeafRange - Get the active leaf ranges
769: Not Collective
771: Input Parameter:
772: . sf - star forest
774: Output Parameters:
775: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
776: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
778: Level: developer
780: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
781: @*/
782: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
783: {
785: PetscSFCheckGraphSet(sf,1);
786: if (minleaf) *minleaf = sf->minleaf;
787: if (maxleaf) *maxleaf = sf->maxleaf;
788: return 0;
789: }
791: /*@C
792: PetscSFViewFromOptions - View from Options
794: Collective on PetscSF
796: Input Parameters:
797: + A - the star forest
798: . obj - Optional object
799: - name - command line option
801: Level: intermediate
802: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
803: @*/
804: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
805: {
807: PetscObjectViewFromOptions((PetscObject)A,obj,name);
808: return 0;
809: }
811: /*@C
812: PetscSFView - view a star forest
814: Collective
816: Input Parameters:
817: + sf - star forest
818: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
820: Level: beginner
822: .seealso: PetscSFCreate(), PetscSFSetGraph()
823: @*/
824: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
825: {
826: PetscBool iascii;
827: PetscViewerFormat format;
830: if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);
833: if (sf->graphset) PetscSFSetUp(sf);
834: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
835: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
836: PetscMPIInt rank;
837: PetscInt ii,i,j;
839: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
840: PetscViewerASCIIPushTab(viewer);
841: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
842: if (!sf->graphset) {
843: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
844: PetscViewerASCIIPopTab(viewer);
845: return 0;
846: }
847: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
848: PetscViewerASCIIPushSynchronized(viewer);
849: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);
850: for (i=0; i<sf->nleaves; i++) {
851: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
852: }
853: PetscViewerFlush(viewer);
854: PetscViewerGetFormat(viewer,&format);
855: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
856: PetscMPIInt *tmpranks,*perm;
857: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
858: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
859: for (i=0; i<sf->nranks; i++) perm[i] = i;
860: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
861: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
862: for (ii=0; ii<sf->nranks; ii++) {
863: i = perm[ii];
864: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
865: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
866: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);
867: }
868: }
869: PetscFree2(tmpranks,perm);
870: }
871: PetscViewerFlush(viewer);
872: PetscViewerASCIIPopSynchronized(viewer);
873: }
874: PetscViewerASCIIPopTab(viewer);
875: }
876: if (sf->ops->View) (*sf->ops->View)(sf,viewer);
877: return 0;
878: }
880: /*@C
881: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
883: Not Collective
885: Input Parameter:
886: . sf - star forest
888: Output Parameters:
889: + nranks - number of ranks referenced by local part
890: . ranks - array of ranks
891: . roffset - offset in rmine/rremote for each rank (length nranks+1)
892: . rmine - concatenated array holding local indices referencing each remote rank
893: - rremote - concatenated array holding remote indices referenced for each remote rank
895: Level: developer
897: .seealso: PetscSFGetLeafRanks()
898: @*/
899: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
900: {
903: if (sf->ops->GetRootRanks) {
904: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
905: } else {
906: /* The generic implementation */
907: if (nranks) *nranks = sf->nranks;
908: if (ranks) *ranks = sf->ranks;
909: if (roffset) *roffset = sf->roffset;
910: if (rmine) *rmine = sf->rmine;
911: if (rremote) *rremote = sf->rremote;
912: }
913: return 0;
914: }
916: /*@C
917: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
919: Not Collective
921: Input Parameter:
922: . sf - star forest
924: Output Parameters:
925: + niranks - number of leaf ranks referencing roots on this process
926: . iranks - array of ranks
927: . ioffset - offset in irootloc for each rank (length niranks+1)
928: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
930: Level: developer
932: .seealso: PetscSFGetRootRanks()
933: @*/
934: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
935: {
938: if (sf->ops->GetLeafRanks) {
939: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
940: } else {
941: PetscSFType type;
942: PetscSFGetType(sf,&type);
943: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
944: }
945: return 0;
946: }
948: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
949: PetscInt i;
950: for (i=0; i<n; i++) {
951: if (needle == list[i]) return PETSC_TRUE;
952: }
953: return PETSC_FALSE;
954: }
956: /*@C
957: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
959: Collective
961: Input Parameters:
962: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
963: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
965: Level: developer
967: .seealso: PetscSFGetRootRanks()
968: @*/
969: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
970: {
971: PetscTable table;
972: PetscTablePosition pos;
973: PetscMPIInt size,groupsize,*groupranks;
974: PetscInt *rcount,*ranks;
975: PetscInt i, irank = -1,orank = -1;
978: PetscSFCheckGraphSet(sf,1);
979: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
980: PetscTableCreate(10,size,&table);
981: for (i=0; i<sf->nleaves; i++) {
982: /* Log 1-based rank */
983: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
984: }
985: PetscTableGetCount(table,&sf->nranks);
986: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
987: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
988: PetscTableGetHeadPosition(table,&pos);
989: for (i=0; i<sf->nranks; i++) {
990: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
991: ranks[i]--; /* Convert back to 0-based */
992: }
993: PetscTableDestroy(&table);
995: /* We expect that dgroup is reliably "small" while nranks could be large */
996: {
997: MPI_Group group = MPI_GROUP_NULL;
998: PetscMPIInt *dgroupranks;
999: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1000: MPI_Group_size(dgroup,&groupsize);
1001: PetscMalloc1(groupsize,&dgroupranks);
1002: PetscMalloc1(groupsize,&groupranks);
1003: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1004: if (groupsize) MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);
1005: MPI_Group_free(&group);
1006: PetscFree(dgroupranks);
1007: }
1009: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1010: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1011: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1012: if (InList(ranks[i],groupsize,groupranks)) break;
1013: }
1014: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1015: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1016: }
1017: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1018: PetscInt tmprank,tmpcount;
1020: tmprank = ranks[i];
1021: tmpcount = rcount[i];
1022: ranks[i] = ranks[sf->ndranks];
1023: rcount[i] = rcount[sf->ndranks];
1024: ranks[sf->ndranks] = tmprank;
1025: rcount[sf->ndranks] = tmpcount;
1026: sf->ndranks++;
1027: }
1028: }
1029: PetscFree(groupranks);
1030: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1031: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1032: sf->roffset[0] = 0;
1033: for (i=0; i<sf->nranks; i++) {
1034: PetscMPIIntCast(ranks[i],sf->ranks+i);
1035: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1036: rcount[i] = 0;
1037: }
1038: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1039: /* short circuit */
1040: if (orank != sf->remote[i].rank) {
1041: /* Search for index of iremote[i].rank in sf->ranks */
1042: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1043: if (irank < 0) {
1044: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1045: if (irank >= 0) irank += sf->ndranks;
1046: }
1047: orank = sf->remote[i].rank;
1048: }
1050: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1051: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1052: rcount[irank]++;
1053: }
1054: PetscFree2(rcount,ranks);
1055: return 0;
1056: }
1058: /*@C
1059: PetscSFGetGroups - gets incoming and outgoing process groups
1061: Collective
1063: Input Parameter:
1064: . sf - star forest
1066: Output Parameters:
1067: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1068: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1070: Level: developer
1072: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1073: @*/
1074: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1075: {
1076: MPI_Group group = MPI_GROUP_NULL;
1079: if (sf->ingroup == MPI_GROUP_NULL) {
1080: PetscInt i;
1081: const PetscInt *indegree;
1082: PetscMPIInt rank,*outranks,*inranks;
1083: PetscSFNode *remote;
1084: PetscSF bgcount;
1086: /* Compute the number of incoming ranks */
1087: PetscMalloc1(sf->nranks,&remote);
1088: for (i=0; i<sf->nranks; i++) {
1089: remote[i].rank = sf->ranks[i];
1090: remote[i].index = 0;
1091: }
1092: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1093: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1094: PetscSFComputeDegreeBegin(bgcount,&indegree);
1095: PetscSFComputeDegreeEnd(bgcount,&indegree);
1096: /* Enumerate the incoming ranks */
1097: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1098: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1099: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1100: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1101: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1102: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1103: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1104: MPI_Group_free(&group);
1105: PetscFree2(inranks,outranks);
1106: PetscSFDestroy(&bgcount);
1107: }
1108: *incoming = sf->ingroup;
1110: if (sf->outgroup == MPI_GROUP_NULL) {
1111: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1112: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1113: MPI_Group_free(&group);
1114: }
1115: *outgoing = sf->outgroup;
1116: return 0;
1117: }
1119: /*@
1120: PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1122: Collective
1124: Input Parameter:
1125: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1127: Output Parameter:
1128: . multi - star forest with split roots, such that each root has degree exactly 1
1130: Level: developer
1132: Notes:
1134: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1135: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1136: edge, it is a candidate for future optimization that might involve its removal.
1138: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1139: @*/
1140: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1141: {
1144: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1145: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1146: *multi = sf->multi;
1147: sf->multi->multi = sf->multi;
1148: return 0;
1149: }
1150: if (!sf->multi) {
1151: const PetscInt *indegree;
1152: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1153: PetscSFNode *remote;
1154: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1155: PetscSFComputeDegreeBegin(sf,&indegree);
1156: PetscSFComputeDegreeEnd(sf,&indegree);
1157: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1158: inoffset[0] = 0;
1159: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1160: for (i=0; i<maxlocal; i++) outones[i] = 1;
1161: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1162: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1163: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1164: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1165: for (i=0; i<sf->nroots; i++) {
1167: }
1168: }
1169: PetscMalloc1(sf->nleaves,&remote);
1170: for (i=0; i<sf->nleaves; i++) {
1171: remote[i].rank = sf->remote[i].rank;
1172: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1173: }
1174: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1175: sf->multi->multi = sf->multi;
1176: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1177: if (sf->rankorder) { /* Sort the ranks */
1178: PetscMPIInt rank;
1179: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1180: PetscSFNode *newremote;
1181: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1182: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1183: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1184: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1185: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1186: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1187: /* Sort the incoming ranks at each vertex, build the inverse map */
1188: for (i=0; i<sf->nroots; i++) {
1189: PetscInt j;
1190: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1191: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1192: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1193: }
1194: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1195: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1196: PetscMalloc1(sf->nleaves,&newremote);
1197: for (i=0; i<sf->nleaves; i++) {
1198: newremote[i].rank = sf->remote[i].rank;
1199: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1200: }
1201: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1202: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1203: }
1204: PetscFree3(inoffset,outones,outoffset);
1205: }
1206: *multi = sf->multi;
1207: return 0;
1208: }
1210: /*@C
1211: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1213: Collective
1215: Input Parameters:
1216: + sf - original star forest
1217: . nselected - number of selected roots on this process
1218: - selected - indices of the selected roots on this process
1220: Output Parameter:
1221: . esf - new star forest
1223: Level: advanced
1225: Note:
1226: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1227: be done by calling PetscSFGetGraph().
1229: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1230: @*/
1231: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1232: {
1233: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1234: const PetscInt *ilocal;
1235: signed char *rootdata,*leafdata,*leafmem;
1236: const PetscSFNode *iremote;
1237: PetscSFNode *new_iremote;
1238: MPI_Comm comm;
1241: PetscSFCheckGraphSet(sf,1);
1245: PetscSFSetUp(sf);
1246: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1247: PetscObjectGetComm((PetscObject)sf,&comm);
1248: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1250: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1251: PetscBool dups;
1254: for (i=0; i<nselected; i++)
1256: }
1258: if (sf->ops->CreateEmbeddedRootSF) {
1259: (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1260: } else {
1261: /* A generic version of creating embedded sf */
1262: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1263: maxlocal = maxleaf - minleaf + 1;
1264: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1265: leafdata = leafmem - minleaf;
1266: /* Tag selected roots and bcast to leaves */
1267: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1268: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1269: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1271: /* Build esf with leaves that are still connected */
1272: esf_nleaves = 0;
1273: for (i=0; i<nleaves; i++) {
1274: j = ilocal ? ilocal[i] : i;
1275: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1276: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1277: */
1278: esf_nleaves += (leafdata[j] ? 1 : 0);
1279: }
1280: PetscMalloc1(esf_nleaves,&new_ilocal);
1281: PetscMalloc1(esf_nleaves,&new_iremote);
1282: for (i=n=0; i<nleaves; i++) {
1283: j = ilocal ? ilocal[i] : i;
1284: if (leafdata[j]) {
1285: new_ilocal[n] = j;
1286: new_iremote[n].rank = iremote[i].rank;
1287: new_iremote[n].index = iremote[i].index;
1288: ++n;
1289: }
1290: }
1291: PetscSFCreate(comm,esf);
1292: PetscSFSetFromOptions(*esf);
1293: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1294: PetscFree2(rootdata,leafmem);
1295: }
1296: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1297: return 0;
1298: }
1300: /*@C
1301: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1303: Collective
1305: Input Parameters:
1306: + sf - original star forest
1307: . nselected - number of selected leaves on this process
1308: - selected - indices of the selected leaves on this process
1310: Output Parameter:
1311: . newsf - new star forest
1313: Level: advanced
1315: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1316: @*/
1317: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1318: {
1319: const PetscSFNode *iremote;
1320: PetscSFNode *new_iremote;
1321: const PetscInt *ilocal;
1322: PetscInt i,nroots,*leaves,*new_ilocal;
1323: MPI_Comm comm;
1326: PetscSFCheckGraphSet(sf,1);
1330: /* Uniq selected[] and put results in leaves[] */
1331: PetscObjectGetComm((PetscObject)sf,&comm);
1332: PetscMalloc1(nselected,&leaves);
1333: PetscArraycpy(leaves,selected,nselected);
1334: PetscSortedRemoveDupsInt(&nselected,leaves);
1337: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1338: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1339: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1340: } else {
1341: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1342: PetscMalloc1(nselected,&new_ilocal);
1343: PetscMalloc1(nselected,&new_iremote);
1344: for (i=0; i<nselected; ++i) {
1345: const PetscInt l = leaves[i];
1346: new_ilocal[i] = ilocal ? ilocal[l] : l;
1347: new_iremote[i].rank = iremote[l].rank;
1348: new_iremote[i].index = iremote[l].index;
1349: }
1350: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1351: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1352: }
1353: PetscFree(leaves);
1354: return 0;
1355: }
1357: /*@C
1358: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1360: Collective on PetscSF
1362: Input Parameters:
1363: + sf - star forest on which to communicate
1364: . unit - data type associated with each node
1365: . rootdata - buffer to broadcast
1366: - op - operation to use for reduction
1368: Output Parameter:
1369: . leafdata - buffer to be reduced with values from each leaf's respective root
1371: Level: intermediate
1373: Notes:
1374: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1375: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1376: use PetscSFBcastWithMemTypeBegin() instead.
1377: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1378: @*/
1379: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1380: {
1381: PetscMemType rootmtype,leafmtype;
1384: PetscSFSetUp(sf);
1385: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1386: PetscGetMemType(rootdata,&rootmtype);
1387: PetscGetMemType(leafdata,&leafmtype);
1388: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1389: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1390: return 0;
1391: }
1393: /*@C
1394: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1396: Collective on PetscSF
1398: Input Parameters:
1399: + sf - star forest on which to communicate
1400: . unit - data type associated with each node
1401: . rootmtype - memory type of rootdata
1402: . rootdata - buffer to broadcast
1403: . leafmtype - memory type of leafdata
1404: - op - operation to use for reduction
1406: Output Parameter:
1407: . leafdata - buffer to be reduced with values from each leaf's respective root
1409: Level: intermediate
1411: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1412: @*/
1413: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1414: {
1416: PetscSFSetUp(sf);
1417: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1418: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1419: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1420: return 0;
1421: }
1423: /*@C
1424: PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1426: Collective
1428: Input Parameters:
1429: + sf - star forest
1430: . unit - data type
1431: . rootdata - buffer to broadcast
1432: - op - operation to use for reduction
1434: Output Parameter:
1435: . leafdata - buffer to be reduced with values from each leaf's respective root
1437: Level: intermediate
1439: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1440: @*/
1441: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1442: {
1444: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);
1445: (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1446: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);
1447: return 0;
1448: }
1450: /*@C
1451: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1453: Collective
1455: Input Parameters:
1456: + sf - star forest
1457: . unit - data type
1458: . leafdata - values to reduce
1459: - op - reduction operation
1461: Output Parameter:
1462: . rootdata - result of reduction of values from all leaves of each root
1464: Level: intermediate
1466: Notes:
1467: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1468: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1469: use PetscSFReduceWithMemTypeBegin() instead.
1471: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1472: @*/
1473: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1474: {
1475: PetscMemType rootmtype,leafmtype;
1478: PetscSFSetUp(sf);
1479: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1480: PetscGetMemType(rootdata,&rootmtype);
1481: PetscGetMemType(leafdata,&leafmtype);
1482: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1483: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1484: return 0;
1485: }
1487: /*@C
1488: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1490: Collective
1492: Input Parameters:
1493: + sf - star forest
1494: . unit - data type
1495: . leafmtype - memory type of leafdata
1496: . leafdata - values to reduce
1497: . rootmtype - memory type of rootdata
1498: - op - reduction operation
1500: Output Parameter:
1501: . rootdata - result of reduction of values from all leaves of each root
1503: Level: intermediate
1505: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1506: @*/
1507: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1508: {
1510: PetscSFSetUp(sf);
1511: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1512: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1513: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1514: return 0;
1515: }
1517: /*@C
1518: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1520: Collective
1522: Input Parameters:
1523: + sf - star forest
1524: . unit - data type
1525: . leafdata - values to reduce
1526: - op - reduction operation
1528: Output Parameter:
1529: . rootdata - result of reduction of values from all leaves of each root
1531: Level: intermediate
1533: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1534: @*/
1535: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1536: {
1538: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1539: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1540: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1541: return 0;
1542: }
1544: /*@C
1545: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1547: Collective
1549: Input Parameters:
1550: + sf - star forest
1551: . unit - data type
1552: . leafdata - leaf values to use in reduction
1553: - op - operation to use for reduction
1555: Output Parameters:
1556: + rootdata - root values to be updated, input state is seen by first process to perform an update
1557: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1559: Level: advanced
1561: Note:
1562: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1563: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1564: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1565: integers.
1567: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1568: @*/
1569: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1570: {
1571: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1574: PetscSFSetUp(sf);
1575: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1576: PetscGetMemType(rootdata,&rootmtype);
1577: PetscGetMemType(leafdata,&leafmtype);
1578: PetscGetMemType(leafupdate,&leafupdatemtype);
1580: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1581: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1582: return 0;
1583: }
1585: /*@C
1586: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1588: Collective
1590: Input Parameters:
1591: + sf - star forest
1592: . unit - data type
1593: . rootmtype - memory type of rootdata
1594: . leafmtype - memory type of leafdata
1595: . leafdata - leaf values to use in reduction
1596: . leafupdatemtype - memory type of leafupdate
1597: - op - operation to use for reduction
1599: Output Parameters:
1600: + rootdata - root values to be updated, input state is seen by first process to perform an update
1601: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1603: Level: advanced
1605: Note: See PetscSFFetchAndOpBegin() for more details.
1607: .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1608: @*/
1609: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1610: {
1612: PetscSFSetUp(sf);
1613: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1615: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1616: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1617: return 0;
1618: }
1620: /*@C
1621: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1623: Collective
1625: Input Parameters:
1626: + sf - star forest
1627: . unit - data type
1628: . leafdata - leaf values to use in reduction
1629: - op - operation to use for reduction
1631: Output Parameters:
1632: + rootdata - root values to be updated, input state is seen by first process to perform an update
1633: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1635: Level: advanced
1637: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1638: @*/
1639: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1640: {
1642: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1643: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1644: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1645: return 0;
1646: }
1648: /*@C
1649: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1651: Collective
1653: Input Parameter:
1654: . sf - star forest
1656: Output Parameter:
1657: . degree - degree of each root vertex
1659: Level: advanced
1661: Notes:
1662: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1664: .seealso: PetscSFGatherBegin()
1665: @*/
1666: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1667: {
1669: PetscSFCheckGraphSet(sf,1);
1671: if (!sf->degreeknown) {
1672: PetscInt i, nroots = sf->nroots, maxlocal;
1674: maxlocal = sf->maxleaf-sf->minleaf+1;
1675: PetscMalloc1(nroots,&sf->degree);
1676: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1677: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1678: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1679: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1680: }
1681: *degree = NULL;
1682: return 0;
1683: }
1685: /*@C
1686: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1688: Collective
1690: Input Parameter:
1691: . sf - star forest
1693: Output Parameter:
1694: . degree - degree of each root vertex
1696: Level: developer
1698: Notes:
1699: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1701: .seealso:
1702: @*/
1703: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1704: {
1706: PetscSFCheckGraphSet(sf,1);
1708: if (!sf->degreeknown) {
1710: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1711: PetscFree(sf->degreetmp);
1712: sf->degreeknown = PETSC_TRUE;
1713: }
1714: *degree = sf->degree;
1715: return 0;
1716: }
1718: /*@C
1719: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1720: Each multi-root is assigned index of the corresponding original root.
1722: Collective
1724: Input Parameters:
1725: + sf - star forest
1726: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1728: Output Parameters:
1729: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1730: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1732: Level: developer
1734: Notes:
1735: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1737: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1738: @*/
1739: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1740: {
1741: PetscSF msf;
1742: PetscInt i, j, k, nroots, nmroots;
1745: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1749: PetscSFGetMultiSF(sf,&msf);
1750: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1751: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1752: for (i=0,j=0,k=0; i<nroots; i++) {
1753: if (!degree[i]) continue;
1754: for (j=0; j<degree[i]; j++,k++) {
1755: (*multiRootsOrigNumbering)[k] = i;
1756: }
1757: }
1759: if (nMultiRoots) *nMultiRoots = nmroots;
1760: return 0;
1761: }
1763: /*@C
1764: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1766: Collective
1768: Input Parameters:
1769: + sf - star forest
1770: . unit - data type
1771: - leafdata - leaf data to gather to roots
1773: Output Parameter:
1774: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1776: Level: intermediate
1778: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1779: @*/
1780: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1781: {
1782: PetscSF multi = NULL;
1785: PetscSFSetUp(sf);
1786: PetscSFGetMultiSF(sf,&multi);
1787: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1788: return 0;
1789: }
1791: /*@C
1792: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1794: Collective
1796: Input Parameters:
1797: + sf - star forest
1798: . unit - data type
1799: - leafdata - leaf data to gather to roots
1801: Output Parameter:
1802: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1804: Level: intermediate
1806: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1807: @*/
1808: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1809: {
1810: PetscSF multi = NULL;
1813: PetscSFGetMultiSF(sf,&multi);
1814: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1815: return 0;
1816: }
1818: /*@C
1819: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1821: Collective
1823: Input Parameters:
1824: + sf - star forest
1825: . unit - data type
1826: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1828: Output Parameter:
1829: . leafdata - leaf data to be update with personal data from each respective root
1831: Level: intermediate
1833: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1834: @*/
1835: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1836: {
1837: PetscSF multi = NULL;
1840: PetscSFSetUp(sf);
1841: PetscSFGetMultiSF(sf,&multi);
1842: PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1843: return 0;
1844: }
1846: /*@C
1847: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1849: Collective
1851: Input Parameters:
1852: + sf - star forest
1853: . unit - data type
1854: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1856: Output Parameter:
1857: . leafdata - leaf data to be update with personal data from each respective root
1859: Level: intermediate
1861: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1862: @*/
1863: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1864: {
1865: PetscSF multi = NULL;
1868: PetscSFGetMultiSF(sf,&multi);
1869: PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1870: return 0;
1871: }
1873: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1874: {
1875: PetscInt i, n, nleaves;
1876: const PetscInt *ilocal = NULL;
1877: PetscHSetI seen;
1879: if (PetscDefined(USE_DEBUG)) {
1880: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1881: PetscHSetICreate(&seen);
1882: for (i = 0; i < nleaves; i++) {
1883: const PetscInt leaf = ilocal ? ilocal[i] : i;
1884: PetscHSetIAdd(seen,leaf);
1885: }
1886: PetscHSetIGetSize(seen,&n);
1888: PetscHSetIDestroy(&seen);
1889: }
1890: return 0;
1891: }
1893: /*@
1894: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1896: Input Parameters:
1897: + sfA - The first PetscSF
1898: - sfB - The second PetscSF
1900: Output Parameters:
1901: . sfBA - The composite SF
1903: Level: developer
1905: Notes:
1906: Currently, the two SFs must be defined on congruent communicators and they must be true star
1907: forests, i.e. the same leaf is not connected with different roots.
1909: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1910: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1911: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1912: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1914: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1915: @*/
1916: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1917: {
1918: const PetscSFNode *remotePointsA,*remotePointsB;
1919: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1920: const PetscInt *localPointsA,*localPointsB;
1921: PetscInt *localPointsBA;
1922: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1923: PetscBool denseB;
1926: PetscSFCheckGraphSet(sfA,1);
1928: PetscSFCheckGraphSet(sfB,2);
1931: PetscSFCheckLeavesUnique_Private(sfA);
1932: PetscSFCheckLeavesUnique_Private(sfB);
1934: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1935: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1936: if (localPointsA) {
1937: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1938: for (i=0; i<numRootsB; i++) {
1939: reorderedRemotePointsA[i].rank = -1;
1940: reorderedRemotePointsA[i].index = -1;
1941: }
1942: for (i=0; i<numLeavesA; i++) {
1943: if (localPointsA[i] >= numRootsB) continue;
1944: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1945: }
1946: remotePointsA = reorderedRemotePointsA;
1947: }
1948: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1949: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1950: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1951: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1952: PetscFree(reorderedRemotePointsA);
1954: denseB = (PetscBool)!localPointsB;
1955: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1956: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1957: else numLeavesBA++;
1958: }
1959: if (denseB) {
1960: localPointsBA = NULL;
1961: remotePointsBA = leafdataB;
1962: } else {
1963: PetscMalloc1(numLeavesBA,&localPointsBA);
1964: PetscMalloc1(numLeavesBA,&remotePointsBA);
1965: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1966: const PetscInt l = localPointsB ? localPointsB[i] : i;
1968: if (leafdataB[l-minleaf].rank == -1) continue;
1969: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1970: localPointsBA[numLeavesBA] = l;
1971: numLeavesBA++;
1972: }
1973: PetscFree(leafdataB);
1974: }
1975: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1976: PetscSFSetFromOptions(*sfBA);
1977: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1978: return 0;
1979: }
1981: /*@
1982: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1984: Input Parameters:
1985: + sfA - The first PetscSF
1986: - sfB - The second PetscSF
1988: Output Parameters:
1989: . sfBA - The composite SF.
1991: Level: developer
1993: Notes:
1994: Currently, the two SFs must be defined on congruent communicators and they must be true star
1995: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1996: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1998: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1999: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2000: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2001: a Reduce on sfB, on connected roots.
2003: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2004: @*/
2005: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2006: {
2007: const PetscSFNode *remotePointsA,*remotePointsB;
2008: PetscSFNode *remotePointsBA;
2009: const PetscInt *localPointsA,*localPointsB;
2010: PetscSFNode *reorderedRemotePointsA = NULL;
2011: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2012: MPI_Op op;
2013: #if defined(PETSC_USE_64BIT_INDICES)
2014: PetscBool iswin;
2015: #endif
2018: PetscSFCheckGraphSet(sfA,1);
2020: PetscSFCheckGraphSet(sfB,2);
2023: PetscSFCheckLeavesUnique_Private(sfA);
2024: PetscSFCheckLeavesUnique_Private(sfB);
2026: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2027: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2029: /* TODO: Check roots of sfB have degree of 1 */
2030: /* Once we implement it, we can replace the MPI_MAXLOC
2031: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2032: We use MPI_MAXLOC only to have a deterministic output from this routine if
2033: the root condition is not meet.
2034: */
2035: op = MPI_MAXLOC;
2036: #if defined(PETSC_USE_64BIT_INDICES)
2037: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2038: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2039: if (iswin) op = MPI_REPLACE;
2040: #endif
2042: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2043: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2044: for (i=0; i<maxleaf - minleaf + 1; i++) {
2045: reorderedRemotePointsA[i].rank = -1;
2046: reorderedRemotePointsA[i].index = -1;
2047: }
2048: if (localPointsA) {
2049: for (i=0; i<numLeavesA; i++) {
2050: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2051: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2052: }
2053: } else {
2054: for (i=0; i<numLeavesA; i++) {
2055: if (i > maxleaf || i < minleaf) continue;
2056: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2057: }
2058: }
2060: PetscMalloc1(numRootsB,&localPointsBA);
2061: PetscMalloc1(numRootsB,&remotePointsBA);
2062: for (i=0; i<numRootsB; i++) {
2063: remotePointsBA[i].rank = -1;
2064: remotePointsBA[i].index = -1;
2065: }
2067: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2068: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2069: PetscFree(reorderedRemotePointsA);
2070: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2071: if (remotePointsBA[i].rank == -1) continue;
2072: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2073: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2074: localPointsBA[numLeavesBA] = i;
2075: numLeavesBA++;
2076: }
2077: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2078: PetscSFSetFromOptions(*sfBA);
2079: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2080: return 0;
2081: }
2083: /*
2084: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2086: Input Parameters:
2087: . sf - The global PetscSF
2089: Output Parameters:
2090: . out - The local PetscSF
2091: */
2092: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2093: {
2094: MPI_Comm comm;
2095: PetscMPIInt myrank;
2096: const PetscInt *ilocal;
2097: const PetscSFNode *iremote;
2098: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2099: PetscSFNode *liremote;
2100: PetscSF lsf;
2103: if (sf->ops->CreateLocalSF) {
2104: (*sf->ops->CreateLocalSF)(sf,out);
2105: } else {
2106: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2107: PetscObjectGetComm((PetscObject)sf,&comm);
2108: MPI_Comm_rank(comm,&myrank);
2110: /* Find out local edges and build a local SF */
2111: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2112: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2113: PetscMalloc1(lnleaves,&lilocal);
2114: PetscMalloc1(lnleaves,&liremote);
2116: for (i=j=0; i<nleaves; i++) {
2117: if (iremote[i].rank == (PetscInt)myrank) {
2118: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2119: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2120: liremote[j].index = iremote[i].index;
2121: j++;
2122: }
2123: }
2124: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2125: PetscSFSetFromOptions(lsf);
2126: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2127: PetscSFSetUp(lsf);
2128: *out = lsf;
2129: }
2130: return 0;
2131: }
2133: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2134: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2135: {
2136: PetscMemType rootmtype,leafmtype;
2139: PetscSFSetUp(sf);
2140: PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2141: PetscGetMemType(rootdata,&rootmtype);
2142: PetscGetMemType(leafdata,&leafmtype);
2143: if (sf->ops->BcastToZero) {
2144: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2145: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2146: PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2147: return 0;
2148: }
2150: /*@
2151: PetscSFConcatenate - concatenate multiple SFs into one
2153: Input Parameters:
2154: + comm - the communicator
2155: . nsfs - the number of input PetscSF
2156: . sfs - the array of input PetscSF
2157: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2158: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2160: Output Parameters:
2161: . newsf - The resulting PetscSF
2163: Level: developer
2165: Notes:
2166: The communicator of all SFs in sfs must be comm.
2168: The offsets in leafOffsets are added to the original leaf indices.
2170: If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2171: In this case, NULL is also passed as ilocal to the resulting SF.
2173: If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2174: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2176: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2177: @*/
2178: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2179: {
2180: PetscInt i, s, nLeaves, nRoots;
2181: PetscInt *leafArrayOffsets;
2182: PetscInt *ilocal_new;
2183: PetscSFNode *iremote_new;
2184: PetscInt *rootOffsets;
2185: PetscBool all_ilocal_null = PETSC_FALSE;
2186: PetscMPIInt rank;
2188: {
2189: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2191: PetscSFCreate(comm,&dummy);
2194: for (i=0; i<nsfs; i++) {
2197: }
2201: PetscSFDestroy(&dummy);
2202: }
2203: if (!nsfs) {
2204: PetscSFCreate(comm, newsf);
2205: PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2206: return 0;
2207: }
2208: MPI_Comm_rank(comm, &rank);
2210: PetscCalloc1(nsfs+1, &rootOffsets);
2211: if (shareRoots) {
2212: PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2213: if (PetscDefined(USE_DEBUG)) {
2214: for (s=1; s<nsfs; s++) {
2215: PetscInt nr;
2217: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2219: }
2220: }
2221: } else {
2222: for (s=0; s<nsfs; s++) {
2223: PetscInt nr;
2225: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2226: rootOffsets[s+1] = rootOffsets[s] + nr;
2227: }
2228: nRoots = rootOffsets[nsfs];
2229: }
2231: /* Calculate leaf array offsets and automatic root offsets */
2232: PetscMalloc1(nsfs+1,&leafArrayOffsets);
2233: leafArrayOffsets[0] = 0;
2234: for (s=0; s<nsfs; s++) {
2235: PetscInt nl;
2237: PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2238: leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2239: }
2240: nLeaves = leafArrayOffsets[nsfs];
2242: if (!leafOffsets) {
2243: all_ilocal_null = PETSC_TRUE;
2244: for (s=0; s<nsfs; s++) {
2245: const PetscInt *ilocal;
2247: PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2248: if (ilocal) {
2249: all_ilocal_null = PETSC_FALSE;
2250: break;
2251: }
2252: }
2254: }
2256: /* Renumber and concatenate local leaves */
2257: ilocal_new = NULL;
2258: if (!all_ilocal_null) {
2259: PetscMalloc1(nLeaves, &ilocal_new);
2260: for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2261: for (s = 0; s<nsfs; s++) {
2262: const PetscInt *ilocal;
2263: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2264: PetscInt i, nleaves_l;
2266: PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2267: for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2268: }
2269: }
2271: /* Renumber and concatenate remote roots */
2272: PetscMalloc1(nLeaves, &iremote_new);
2273: for (i = 0; i < nLeaves; i++) {
2274: iremote_new[i].rank = -1;
2275: iremote_new[i].index = -1;
2276: }
2277: for (s = 0; s<nsfs; s++) {
2278: PetscInt i, nl, nr;
2279: PetscSF tmp_sf;
2280: const PetscSFNode *iremote;
2281: PetscSFNode *tmp_rootdata;
2282: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2284: PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2285: PetscSFCreate(comm, &tmp_sf);
2286: /* create helper SF with contiguous leaves */
2287: PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);
2288: PetscSFSetUp(tmp_sf);
2289: PetscMalloc1(nr, &tmp_rootdata);
2290: for (i = 0; i < nr; i++) {
2291: tmp_rootdata[i].index = i + rootOffsets[s];
2292: tmp_rootdata[i].rank = (PetscInt) rank;
2293: }
2294: PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2295: PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2296: PetscSFDestroy(&tmp_sf);
2297: PetscFree(tmp_rootdata);
2298: }
2300: /* Build the new SF */
2301: PetscSFCreate(comm, newsf);
2302: PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2303: PetscSFSetUp(*newsf);
2304: PetscFree(rootOffsets);
2305: PetscFree(leafArrayOffsets);
2306: return 0;
2307: }