Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/hashmapi.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #include <petscdevice_cuda.h>
9: #endif
11: #if defined(PETSC_HAVE_HIP)
12: #include <hip/hip_runtime.h>
13: #endif
15: #if defined(PETSC_CLANG_STATIC_ANALYZER)
16: extern void PetscSFCheckGraphSet(PetscSF, int);
17: #else
18: #if defined(PETSC_USE_DEBUG)
19: #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20: #else
21: #define PetscSFCheckGraphSet(sf, arg) \
22: do { \
23: } while (0)
24: #endif
25: #endif
27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
30: /*@
31: PetscSFCreate - create a star forest communication context
33: Collective
35: Input Parameter:
36: . comm - communicator on which the star forest will operate
38: Output Parameter:
39: . sf - new star forest context
41: Options Database Key:
42: + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default)
43: . -sf_type window - Use MPI-3 one-sided window for communication
44: . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication
45: - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
47: Level: intermediate
49: Note:
50: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52: `SF`s are optimized and they have better performance than the general `SF`s.
54: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55: @*/
56: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57: {
58: PetscSF b;
60: PetscFunctionBegin;
61: PetscAssertPointer(sf, 2);
62: PetscCall(PetscSFInitializePackage());
64: PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
66: b->nroots = -1;
67: b->nleaves = -1;
68: b->minleaf = PETSC_MAX_INT;
69: b->maxleaf = PETSC_MIN_INT;
70: b->nranks = -1;
71: b->rankorder = PETSC_TRUE;
72: b->ingroup = MPI_GROUP_NULL;
73: b->outgroup = MPI_GROUP_NULL;
74: b->graphset = PETSC_FALSE;
75: #if defined(PETSC_HAVE_DEVICE)
76: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
77: b->use_stream_aware_mpi = PETSC_FALSE;
78: b->unknown_input_stream = PETSC_FALSE;
79: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
80: b->backend = PETSCSF_BACKEND_KOKKOS;
81: #elif defined(PETSC_HAVE_CUDA)
82: b->backend = PETSCSF_BACKEND_CUDA;
83: #elif defined(PETSC_HAVE_HIP)
84: b->backend = PETSCSF_BACKEND_HIP;
85: #endif
87: #if defined(PETSC_HAVE_NVSHMEM)
88: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
89: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
90: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
91: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
92: #endif
93: #endif
94: b->vscat.from_n = -1;
95: b->vscat.to_n = -1;
96: b->vscat.unit = MPIU_SCALAR;
97: *sf = b;
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: /*@
102: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
104: Collective
106: Input Parameter:
107: . sf - star forest
109: Level: advanced
111: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
112: @*/
113: PetscErrorCode PetscSFReset(PetscSF sf)
114: {
115: PetscFunctionBegin;
117: PetscTryTypeMethod(sf, Reset);
118: PetscCall(PetscSFDestroy(&sf->rankssf));
120: sf->nroots = -1;
121: sf->nleaves = -1;
122: sf->minleaf = PETSC_MAX_INT;
123: sf->maxleaf = PETSC_MIN_INT;
124: sf->mine = NULL;
125: sf->remote = NULL;
126: sf->graphset = PETSC_FALSE;
127: PetscCall(PetscFree(sf->mine_alloc));
128: PetscCall(PetscFree(sf->remote_alloc));
129: sf->nranks = -1;
130: PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
131: sf->degreeknown = PETSC_FALSE;
132: PetscCall(PetscFree(sf->degree));
133: if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
134: if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
136: if (sf->multi) sf->multi->multi = NULL;
137: PetscCall(PetscSFDestroy(&sf->multi));
139: PetscCall(PetscLayoutDestroy(&sf->map));
141: #if defined(PETSC_HAVE_DEVICE)
142: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
143: #endif
145: sf->setupcalled = PETSC_FALSE;
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /*@C
150: PetscSFSetType - Set the `PetscSF` communication implementation
152: Collective
154: Input Parameters:
155: + sf - the `PetscSF` context
156: - type - a known method
157: .vb
158: PETSCSFWINDOW - MPI-2/3 one-sided
159: PETSCSFBASIC - basic implementation using MPI-1 two-sided
160: .ve
162: Options Database Key:
163: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
165: Level: intermediate
167: Notes:
168: See `PetscSFType` for possible values
170: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
171: @*/
172: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
173: {
174: PetscBool match;
175: PetscErrorCode (*r)(PetscSF);
177: PetscFunctionBegin;
179: PetscAssertPointer(type, 2);
181: PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
182: if (match) PetscFunctionReturn(PETSC_SUCCESS);
184: PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
185: PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
186: /* Destroy the previous PetscSF implementation context */
187: PetscTryTypeMethod(sf, Destroy);
188: PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
189: PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
190: PetscCall((*r)(sf));
191: PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: /*@C
195: PetscSFGetType - Get the `PetscSF` communication implementation
197: Not Collective
199: Input Parameter:
200: . sf - the `PetscSF` context
202: Output Parameter:
203: . type - the `PetscSF` type name
205: Level: intermediate
207: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
208: @*/
209: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
210: {
211: PetscFunctionBegin;
213: PetscAssertPointer(type, 2);
214: *type = ((PetscObject)sf)->type_name;
215: PetscFunctionReturn(PETSC_SUCCESS);
216: }
218: /*@C
219: PetscSFDestroy - destroy a star forest
221: Collective
223: Input Parameter:
224: . sf - address of star forest
226: Level: intermediate
228: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
229: @*/
230: PetscErrorCode PetscSFDestroy(PetscSF *sf)
231: {
232: PetscFunctionBegin;
233: if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
235: if (--((PetscObject)*sf)->refct > 0) {
236: *sf = NULL;
237: PetscFunctionReturn(PETSC_SUCCESS);
238: }
239: PetscCall(PetscSFReset(*sf));
240: PetscTryTypeMethod(*sf, Destroy);
241: PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
242: if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
243: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
244: if ((*sf)->use_stream_aware_mpi) {
245: PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
246: PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
247: }
248: #endif
249: PetscCall(PetscHeaderDestroy(sf));
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
254: {
255: PetscInt i, nleaves;
256: PetscMPIInt size;
257: const PetscInt *ilocal;
258: const PetscSFNode *iremote;
260: PetscFunctionBegin;
261: if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
262: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
263: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
264: for (i = 0; i < nleaves; i++) {
265: const PetscInt rank = iremote[i].rank;
266: const PetscInt remote = iremote[i].index;
267: const PetscInt leaf = ilocal ? ilocal[i] : i;
268: PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
269: PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
270: PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
271: }
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: /*@
276: PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
278: Collective
280: Input Parameter:
281: . sf - star forest communication object
283: Level: beginner
285: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
286: @*/
287: PetscErrorCode PetscSFSetUp(PetscSF sf)
288: {
289: PetscFunctionBegin;
291: PetscSFCheckGraphSet(sf, 1);
292: if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
293: PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
294: PetscCall(PetscSFCheckGraphValid_Private(sf));
295: if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
296: PetscTryTypeMethod(sf, SetUp);
297: #if defined(PETSC_HAVE_CUDA)
298: if (sf->backend == PETSCSF_BACKEND_CUDA) {
299: sf->ops->Malloc = PetscSFMalloc_CUDA;
300: sf->ops->Free = PetscSFFree_CUDA;
301: }
302: #endif
303: #if defined(PETSC_HAVE_HIP)
304: if (sf->backend == PETSCSF_BACKEND_HIP) {
305: sf->ops->Malloc = PetscSFMalloc_HIP;
306: sf->ops->Free = PetscSFFree_HIP;
307: }
308: #endif
310: #if defined(PETSC_HAVE_KOKKOS)
311: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
312: sf->ops->Malloc = PetscSFMalloc_Kokkos;
313: sf->ops->Free = PetscSFFree_Kokkos;
314: }
315: #endif
316: PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
317: sf->setupcalled = PETSC_TRUE;
318: PetscFunctionReturn(PETSC_SUCCESS);
319: }
321: /*@
322: PetscSFSetFromOptions - set `PetscSF` options using the options database
324: Logically Collective
326: Input Parameter:
327: . sf - star forest
329: Options Database Keys:
330: + -sf_type - implementation type, see `PetscSFSetType()`
331: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
332: . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
333: use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
334: If true, this option only works with `-use_gpu_aware_mpi 1`.
335: . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
336: If true, this option only works with `-use_gpu_aware_mpi 1`.
338: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
339: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
340: the only available is kokkos.
342: Level: intermediate
344: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
345: @*/
346: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347: {
348: PetscSFType deft;
349: char type[256];
350: PetscBool flg;
352: PetscFunctionBegin;
354: PetscObjectOptionsBegin((PetscObject)sf);
355: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
356: PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
357: PetscCall(PetscSFSetType(sf, flg ? type : deft));
358: PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
359: #if defined(PETSC_HAVE_DEVICE)
360: {
361: char backendstr[32] = {0};
362: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363: /* Change the defaults set in PetscSFCreate() with command line options */
364: PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365: PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366: PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367: PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368: PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369: PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374: else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375: #elif defined(PETSC_HAVE_KOKKOS)
376: PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377: #endif
379: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380: if (sf->use_stream_aware_mpi) {
381: MPI_Info info;
383: PetscCallMPI(MPI_Info_create(&info));
384: PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385: PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386: PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387: PetscCallMPI(MPI_Info_free(&info));
388: PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389: }
390: #endif
391: }
392: #endif
393: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394: PetscOptionsEnd();
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: /*@
399: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
401: Logically Collective
403: Input Parameters:
404: + sf - star forest
405: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
407: Level: advanced
409: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410: @*/
411: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412: {
413: PetscFunctionBegin;
416: PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417: sf->rankorder = flg;
418: PetscFunctionReturn(PETSC_SUCCESS);
419: }
421: /*@C
422: PetscSFSetGraph - Set a parallel star forest
424: Collective
426: Input Parameters:
427: + sf - star forest
428: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
430: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431: during setup in debug mode)
432: . localmode - copy mode for `ilocal`
433: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
434: during setup in debug mode)
435: - remotemode - copy mode for `iremote`
437: Level: intermediate
439: Notes:
440: Leaf indices in `ilocal` must be unique, otherwise an error occurs.
442: Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443: In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444: PETSc might modify the respective array;
445: if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
448: Fortran Notes:
449: In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
451: Developer Notes:
452: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453: This also allows to compare leaf sets of two `PetscSF`s easily.
455: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456: @*/
457: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
458: {
459: PetscBool unique, contiguous;
461: PetscFunctionBegin;
463: if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464: if (nleaves > 0) PetscAssertPointer(iremote, 6);
465: PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466: PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469: PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470: PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
472: if (sf->nroots >= 0) { /* Reset only if graph already set */
473: PetscCall(PetscSFReset(sf));
474: }
476: PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
478: sf->nroots = nroots;
479: sf->nleaves = nleaves;
481: if (localmode == PETSC_COPY_VALUES && ilocal) {
482: PetscInt *tlocal = NULL;
484: PetscCall(PetscMalloc1(nleaves, &tlocal));
485: PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
486: ilocal = tlocal;
487: }
488: if (remotemode == PETSC_COPY_VALUES) {
489: PetscSFNode *tremote = NULL;
491: PetscCall(PetscMalloc1(nleaves, &tremote));
492: PetscCall(PetscArraycpy(tremote, iremote, nleaves));
493: iremote = tremote;
494: }
496: if (nleaves && ilocal) {
497: PetscSFNode work;
499: PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
500: PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
501: unique = PetscNot(unique);
502: PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
503: sf->minleaf = ilocal[0];
504: sf->maxleaf = ilocal[nleaves - 1];
505: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
506: } else {
507: sf->minleaf = 0;
508: sf->maxleaf = nleaves - 1;
509: unique = PETSC_TRUE;
510: contiguous = PETSC_TRUE;
511: }
513: if (contiguous) {
514: if (localmode == PETSC_USE_POINTER) {
515: ilocal = NULL;
516: } else {
517: PetscCall(PetscFree(ilocal));
518: }
519: }
520: sf->mine = ilocal;
521: if (localmode == PETSC_USE_POINTER) {
522: sf->mine_alloc = NULL;
523: } else {
524: sf->mine_alloc = ilocal;
525: }
526: sf->remote = iremote;
527: if (remotemode == PETSC_USE_POINTER) {
528: sf->remote_alloc = NULL;
529: } else {
530: sf->remote_alloc = iremote;
531: }
532: PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
533: sf->graphset = PETSC_TRUE;
534: PetscFunctionReturn(PETSC_SUCCESS);
535: }
537: /*@
538: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
540: Collective
542: Input Parameters:
543: + sf - The `PetscSF`
544: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
545: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
547: Level: intermediate
549: Notes:
550: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
551: `n` and `N` are the local and global sizes of `x` respectively.
553: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
554: sequential vectors `y` on all MPI processes.
556: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
557: sequential vector `y` on rank 0.
559: In above cases, entries of `x` are roots and entries of `y` are leaves.
561: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
562: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
563: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
564: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
565: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
567: In this case, roots and leaves are symmetric.
569: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
570: @*/
571: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
572: {
573: MPI_Comm comm;
574: PetscInt n, N, res[2];
575: PetscMPIInt rank, size;
576: PetscSFType type;
578: PetscFunctionBegin;
580: if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
581: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
582: PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
583: PetscCallMPI(MPI_Comm_rank(comm, &rank));
584: PetscCallMPI(MPI_Comm_size(comm, &size));
586: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
587: type = PETSCSFALLTOALL;
588: PetscCall(PetscLayoutCreate(comm, &sf->map));
589: PetscCall(PetscLayoutSetLocalSize(sf->map, size));
590: PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
591: PetscCall(PetscLayoutSetUp(sf->map));
592: } else {
593: PetscCall(PetscLayoutGetLocalSize(map, &n));
594: PetscCall(PetscLayoutGetSize(map, &N));
595: res[0] = n;
596: res[1] = -n;
597: /* Check if n are same over all ranks so that we can optimize it */
598: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
599: if (res[0] == -res[1]) { /* same n */
600: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
601: } else {
602: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
603: }
604: PetscCall(PetscLayoutReference(map, &sf->map));
605: }
606: PetscCall(PetscSFSetType(sf, type));
608: sf->pattern = pattern;
609: sf->mine = NULL; /* Contiguous */
611: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
612: Also set other easy stuff.
613: */
614: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
615: sf->nleaves = N;
616: sf->nroots = n;
617: sf->nranks = size;
618: sf->minleaf = 0;
619: sf->maxleaf = N - 1;
620: } else if (pattern == PETSCSF_PATTERN_GATHER) {
621: sf->nleaves = rank ? 0 : N;
622: sf->nroots = n;
623: sf->nranks = rank ? 0 : size;
624: sf->minleaf = 0;
625: sf->maxleaf = rank ? -1 : N - 1;
626: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
627: sf->nleaves = size;
628: sf->nroots = size;
629: sf->nranks = size;
630: sf->minleaf = 0;
631: sf->maxleaf = size - 1;
632: }
633: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
634: sf->graphset = PETSC_TRUE;
635: PetscFunctionReturn(PETSC_SUCCESS);
636: }
638: /*@
639: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
641: Collective
643: Input Parameter:
644: . sf - star forest to invert
646: Output Parameter:
647: . isf - inverse of `sf`
649: Level: advanced
651: Notes:
652: All roots must have degree 1.
654: The local space may be a permutation, but cannot be sparse.
656: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
657: @*/
658: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
659: {
660: PetscMPIInt rank;
661: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
662: const PetscInt *ilocal;
663: PetscSFNode *roots, *leaves;
665: PetscFunctionBegin;
667: PetscSFCheckGraphSet(sf, 1);
668: PetscAssertPointer(isf, 2);
670: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
671: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
673: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
674: PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
675: for (i = 0; i < maxlocal; i++) {
676: leaves[i].rank = rank;
677: leaves[i].index = i;
678: }
679: for (i = 0; i < nroots; i++) {
680: roots[i].rank = -1;
681: roots[i].index = -1;
682: }
683: PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
684: PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
686: /* Check whether our leaves are sparse */
687: for (i = 0, count = 0; i < nroots; i++)
688: if (roots[i].rank >= 0) count++;
689: if (count == nroots) newilocal = NULL;
690: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
691: for (i = 0, count = 0; i < nroots; i++) {
692: if (roots[i].rank >= 0) {
693: newilocal[count] = i;
694: roots[count].rank = roots[i].rank;
695: roots[count].index = roots[i].index;
696: count++;
697: }
698: }
699: }
701: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
702: PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
703: PetscCall(PetscFree2(roots, leaves));
704: PetscFunctionReturn(PETSC_SUCCESS);
705: }
707: /*@
708: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
710: Collective
712: Input Parameters:
713: + sf - communication object to duplicate
714: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
716: Output Parameter:
717: . newsf - new communication object
719: Level: beginner
721: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
722: @*/
723: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
724: {
725: PetscSFType type;
726: MPI_Datatype dtype = MPIU_SCALAR;
728: PetscFunctionBegin;
731: PetscAssertPointer(newsf, 3);
732: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
733: PetscCall(PetscSFGetType(sf, &type));
734: if (type) PetscCall(PetscSFSetType(*newsf, type));
735: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
736: if (opt == PETSCSF_DUPLICATE_GRAPH) {
737: PetscSFCheckGraphSet(sf, 1);
738: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
739: PetscInt nroots, nleaves;
740: const PetscInt *ilocal;
741: const PetscSFNode *iremote;
742: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
743: PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
744: } else {
745: PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
746: }
747: }
748: /* Since oldtype is committed, so is newtype, according to MPI */
749: if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
750: (*newsf)->vscat.bs = sf->vscat.bs;
751: (*newsf)->vscat.unit = dtype;
752: (*newsf)->vscat.to_n = sf->vscat.to_n;
753: (*newsf)->vscat.from_n = sf->vscat.from_n;
754: /* Do not copy lsf. Build it on demand since it is rarely used */
756: #if defined(PETSC_HAVE_DEVICE)
757: (*newsf)->backend = sf->backend;
758: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
759: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
760: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
761: #endif
762: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
763: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
764: PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: /*@C
768: PetscSFGetGraph - Get the graph specifying a parallel star forest
770: Not Collective
772: Input Parameter:
773: . sf - star forest
775: Output Parameters:
776: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
777: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
778: . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
779: - iremote - remote locations of root vertices for each leaf on the current process
781: Level: intermediate
783: Notes:
784: We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
786: The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
788: Fortran Notes:
789: The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
790: want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
792: To check for a `NULL` `ilocal` use
793: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
795: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
796: @*/
797: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
798: {
799: PetscFunctionBegin;
801: if (sf->ops->GetGraph) {
802: PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
803: } else {
804: if (nroots) *nroots = sf->nroots;
805: if (nleaves) *nleaves = sf->nleaves;
806: if (ilocal) *ilocal = sf->mine;
807: if (iremote) *iremote = sf->remote;
808: }
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: /*@
813: PetscSFGetLeafRange - Get the active leaf ranges
815: Not Collective
817: Input Parameter:
818: . sf - star forest
820: Output Parameters:
821: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
822: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
824: Level: developer
826: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
827: @*/
828: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
829: {
830: PetscFunctionBegin;
832: PetscSFCheckGraphSet(sf, 1);
833: if (minleaf) *minleaf = sf->minleaf;
834: if (maxleaf) *maxleaf = sf->maxleaf;
835: PetscFunctionReturn(PETSC_SUCCESS);
836: }
838: /*@C
839: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
841: Collective
843: Input Parameters:
844: + A - the star forest
845: . obj - Optional object that provides the prefix for the option names
846: - name - command line option
848: Level: intermediate
850: Note:
851: See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
853: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
854: @*/
855: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
856: {
857: PetscFunctionBegin;
859: PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
860: PetscFunctionReturn(PETSC_SUCCESS);
861: }
863: /*@C
864: PetscSFView - view a star forest
866: Collective
868: Input Parameters:
869: + sf - star forest
870: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
872: Level: beginner
874: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
875: @*/
876: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
877: {
878: PetscBool iascii;
879: PetscViewerFormat format;
881: PetscFunctionBegin;
883: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
885: PetscCheckSameComm(sf, 1, viewer, 2);
886: if (sf->graphset) PetscCall(PetscSFSetUp(sf));
887: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
888: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
889: PetscMPIInt rank;
890: PetscInt ii, i, j;
892: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
893: PetscCall(PetscViewerASCIIPushTab(viewer));
894: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
895: if (!sf->graphset) {
896: PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
897: PetscCall(PetscViewerASCIIPopTab(viewer));
898: PetscFunctionReturn(PETSC_SUCCESS);
899: }
900: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
901: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
902: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
903: for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
904: PetscCall(PetscViewerFlush(viewer));
905: PetscCall(PetscViewerGetFormat(viewer, &format));
906: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
907: PetscMPIInt *tmpranks, *perm;
908: PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
909: PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
910: for (i = 0; i < sf->nranks; i++) perm[i] = i;
911: PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
912: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
913: for (ii = 0; ii < sf->nranks; ii++) {
914: i = perm[ii];
915: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
916: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
917: }
918: PetscCall(PetscFree2(tmpranks, perm));
919: }
920: PetscCall(PetscViewerFlush(viewer));
921: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
922: }
923: PetscCall(PetscViewerASCIIPopTab(viewer));
924: }
925: PetscTryTypeMethod(sf, View, viewer);
926: PetscFunctionReturn(PETSC_SUCCESS);
927: }
929: /*@C
930: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
932: Not Collective
934: Input Parameter:
935: . sf - star forest
937: Output Parameters:
938: + nranks - number of ranks referenced by local part
939: . ranks - [`nranks`] array of ranks
940: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
941: . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
942: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
944: Level: developer
946: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
947: @*/
948: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
949: {
950: PetscFunctionBegin;
952: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
953: if (sf->ops->GetRootRanks) {
954: PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
955: } else {
956: /* The generic implementation */
957: if (nranks) *nranks = sf->nranks;
958: if (ranks) *ranks = sf->ranks;
959: if (roffset) *roffset = sf->roffset;
960: if (rmine) *rmine = sf->rmine;
961: if (rremote) *rremote = sf->rremote;
962: }
963: PetscFunctionReturn(PETSC_SUCCESS);
964: }
966: /*@C
967: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
969: Not Collective
971: Input Parameter:
972: . sf - star forest
974: Output Parameters:
975: + niranks - number of leaf ranks referencing roots on this process
976: . iranks - [`niranks`] array of ranks
977: . ioffset - [`niranks`+1] offset in `irootloc` for each rank
978: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
980: Level: developer
982: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
983: @*/
984: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
985: {
986: PetscFunctionBegin;
988: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
989: if (sf->ops->GetLeafRanks) {
990: PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
991: } else {
992: PetscSFType type;
993: PetscCall(PetscSFGetType(sf, &type));
994: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
995: }
996: PetscFunctionReturn(PETSC_SUCCESS);
997: }
999: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1000: {
1001: PetscInt i;
1002: for (i = 0; i < n; i++) {
1003: if (needle == list[i]) return PETSC_TRUE;
1004: }
1005: return PETSC_FALSE;
1006: }
1008: /*@C
1009: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1011: Collective
1013: Input Parameters:
1014: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1015: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1017: Level: developer
1019: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1020: @*/
1021: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1022: {
1023: PetscHMapI table;
1024: PetscHashIter pos;
1025: PetscMPIInt size, groupsize, *groupranks;
1026: PetscInt *rcount, *ranks;
1027: PetscInt i, irank = -1, orank = -1;
1029: PetscFunctionBegin;
1031: PetscSFCheckGraphSet(sf, 1);
1032: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1033: PetscCall(PetscHMapICreateWithSize(10, &table));
1034: for (i = 0; i < sf->nleaves; i++) {
1035: /* Log 1-based rank */
1036: PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1037: }
1038: PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1039: PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1040: PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1041: PetscHashIterBegin(table, pos);
1042: for (i = 0; i < sf->nranks; i++) {
1043: PetscHashIterGetKey(table, pos, ranks[i]);
1044: PetscHashIterGetVal(table, pos, rcount[i]);
1045: PetscHashIterNext(table, pos);
1046: ranks[i]--; /* Convert back to 0-based */
1047: }
1048: PetscCall(PetscHMapIDestroy(&table));
1050: /* We expect that dgroup is reliably "small" while nranks could be large */
1051: {
1052: MPI_Group group = MPI_GROUP_NULL;
1053: PetscMPIInt *dgroupranks;
1054: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1055: PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1056: PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1057: PetscCall(PetscMalloc1(groupsize, &groupranks));
1058: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1059: if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1060: PetscCallMPI(MPI_Group_free(&group));
1061: PetscCall(PetscFree(dgroupranks));
1062: }
1064: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1065: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1066: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1067: if (InList(ranks[i], groupsize, groupranks)) break;
1068: }
1069: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1070: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1071: }
1072: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1073: PetscInt tmprank, tmpcount;
1075: tmprank = ranks[i];
1076: tmpcount = rcount[i];
1077: ranks[i] = ranks[sf->ndranks];
1078: rcount[i] = rcount[sf->ndranks];
1079: ranks[sf->ndranks] = tmprank;
1080: rcount[sf->ndranks] = tmpcount;
1081: sf->ndranks++;
1082: }
1083: }
1084: PetscCall(PetscFree(groupranks));
1085: PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1086: if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1087: sf->roffset[0] = 0;
1088: for (i = 0; i < sf->nranks; i++) {
1089: PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1090: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1091: rcount[i] = 0;
1092: }
1093: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1094: /* short circuit */
1095: if (orank != sf->remote[i].rank) {
1096: /* Search for index of iremote[i].rank in sf->ranks */
1097: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1098: if (irank < 0) {
1099: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1100: if (irank >= 0) irank += sf->ndranks;
1101: }
1102: orank = sf->remote[i].rank;
1103: }
1104: PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1105: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1106: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1107: rcount[irank]++;
1108: }
1109: PetscCall(PetscFree2(rcount, ranks));
1110: PetscFunctionReturn(PETSC_SUCCESS);
1111: }
1113: /*@C
1114: PetscSFGetGroups - gets incoming and outgoing process groups
1116: Collective
1118: Input Parameter:
1119: . sf - star forest
1121: Output Parameters:
1122: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1123: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1125: Level: developer
1127: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1128: @*/
1129: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1130: {
1131: MPI_Group group = MPI_GROUP_NULL;
1133: PetscFunctionBegin;
1134: PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1135: if (sf->ingroup == MPI_GROUP_NULL) {
1136: PetscInt i;
1137: const PetscInt *indegree;
1138: PetscMPIInt rank, *outranks, *inranks;
1139: PetscSFNode *remote;
1140: PetscSF bgcount;
1142: /* Compute the number of incoming ranks */
1143: PetscCall(PetscMalloc1(sf->nranks, &remote));
1144: for (i = 0; i < sf->nranks; i++) {
1145: remote[i].rank = sf->ranks[i];
1146: remote[i].index = 0;
1147: }
1148: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1149: PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1150: PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1151: PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1152: /* Enumerate the incoming ranks */
1153: PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1154: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1155: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1156: PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1157: PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1158: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1159: PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1160: PetscCallMPI(MPI_Group_free(&group));
1161: PetscCall(PetscFree2(inranks, outranks));
1162: PetscCall(PetscSFDestroy(&bgcount));
1163: }
1164: *incoming = sf->ingroup;
1166: if (sf->outgroup == MPI_GROUP_NULL) {
1167: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1168: PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1169: PetscCallMPI(MPI_Group_free(&group));
1170: }
1171: *outgoing = sf->outgroup;
1172: PetscFunctionReturn(PETSC_SUCCESS);
1173: }
1175: /*@
1176: PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1178: Collective
1180: Input Parameter:
1181: . sf - star forest
1183: Output Parameter:
1184: . rsf - the star forest with a single root per process to perform communications
1186: Level: developer
1188: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1189: @*/
1190: PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1191: {
1192: PetscFunctionBegin;
1194: PetscAssertPointer(rsf, 2);
1195: if (!sf->rankssf) {
1196: PetscSFNode *rremotes;
1197: const PetscMPIInt *ranks;
1198: PetscInt nranks;
1200: PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1201: PetscCall(PetscMalloc1(nranks, &rremotes));
1202: for (PetscInt i = 0; i < nranks; i++) {
1203: rremotes[i].rank = ranks[i];
1204: rremotes[i].index = 0;
1205: }
1206: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1207: PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1208: }
1209: *rsf = sf->rankssf;
1210: PetscFunctionReturn(PETSC_SUCCESS);
1211: }
1213: /*@
1214: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1216: Collective
1218: Input Parameter:
1219: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1221: Output Parameter:
1222: . multi - star forest with split roots, such that each root has degree exactly 1
1224: Level: developer
1226: Note:
1227: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1228: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1229: edge, it is a candidate for future optimization that might involve its removal.
1231: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1232: @*/
1233: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1234: {
1235: PetscFunctionBegin;
1237: PetscAssertPointer(multi, 2);
1238: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1239: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1240: *multi = sf->multi;
1241: sf->multi->multi = sf->multi;
1242: PetscFunctionReturn(PETSC_SUCCESS);
1243: }
1244: if (!sf->multi) {
1245: const PetscInt *indegree;
1246: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1247: PetscSFNode *remote;
1248: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1249: PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1250: PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1251: PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1252: inoffset[0] = 0;
1253: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1254: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1255: PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1256: PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1257: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1258: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1259: for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1260: }
1261: PetscCall(PetscMalloc1(sf->nleaves, &remote));
1262: for (i = 0; i < sf->nleaves; i++) {
1263: remote[i].rank = sf->remote[i].rank;
1264: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1265: }
1266: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1267: sf->multi->multi = sf->multi;
1268: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1269: if (sf->rankorder) { /* Sort the ranks */
1270: PetscMPIInt rank;
1271: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1272: PetscSFNode *newremote;
1273: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1274: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1275: PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1276: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1277: PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1278: PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1279: /* Sort the incoming ranks at each vertex, build the inverse map */
1280: for (i = 0; i < sf->nroots; i++) {
1281: PetscInt j;
1282: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1283: PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1284: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1285: }
1286: PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1287: PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1288: PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1289: for (i = 0; i < sf->nleaves; i++) {
1290: newremote[i].rank = sf->remote[i].rank;
1291: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1292: }
1293: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1294: PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1295: }
1296: PetscCall(PetscFree3(inoffset, outones, outoffset));
1297: }
1298: *multi = sf->multi;
1299: PetscFunctionReturn(PETSC_SUCCESS);
1300: }
1302: /*@C
1303: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1305: Collective
1307: Input Parameters:
1308: + sf - original star forest
1309: . nselected - number of selected roots on this process
1310: - selected - indices of the selected roots on this process
1312: Output Parameter:
1313: . esf - new star forest
1315: Level: advanced
1317: Note:
1318: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1319: be done by calling PetscSFGetGraph().
1321: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1322: @*/
1323: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1324: {
1325: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1326: const PetscInt *ilocal;
1327: signed char *rootdata, *leafdata, *leafmem;
1328: const PetscSFNode *iremote;
1329: PetscSFNode *new_iremote;
1330: MPI_Comm comm;
1332: PetscFunctionBegin;
1334: PetscSFCheckGraphSet(sf, 1);
1335: if (nselected) PetscAssertPointer(selected, 3);
1336: PetscAssertPointer(esf, 4);
1338: PetscCall(PetscSFSetUp(sf));
1339: PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1340: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1341: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1343: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1344: PetscBool dups;
1345: PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1346: PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1347: for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1348: }
1350: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1351: else {
1352: /* A generic version of creating embedded sf */
1353: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1354: maxlocal = maxleaf - minleaf + 1;
1355: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1356: leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1357: /* Tag selected roots and bcast to leaves */
1358: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1359: PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1360: PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1362: /* Build esf with leaves that are still connected */
1363: esf_nleaves = 0;
1364: for (i = 0; i < nleaves; i++) {
1365: j = ilocal ? ilocal[i] : i;
1366: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1367: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1368: */
1369: esf_nleaves += (leafdata[j] ? 1 : 0);
1370: }
1371: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1372: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1373: for (i = n = 0; i < nleaves; i++) {
1374: j = ilocal ? ilocal[i] : i;
1375: if (leafdata[j]) {
1376: new_ilocal[n] = j;
1377: new_iremote[n].rank = iremote[i].rank;
1378: new_iremote[n].index = iremote[i].index;
1379: ++n;
1380: }
1381: }
1382: PetscCall(PetscSFCreate(comm, esf));
1383: PetscCall(PetscSFSetFromOptions(*esf));
1384: PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1385: PetscCall(PetscFree2(rootdata, leafmem));
1386: }
1387: PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1388: PetscFunctionReturn(PETSC_SUCCESS);
1389: }
1391: /*@C
1392: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1394: Collective
1396: Input Parameters:
1397: + sf - original star forest
1398: . nselected - number of selected leaves on this process
1399: - selected - indices of the selected leaves on this process
1401: Output Parameter:
1402: . newsf - new star forest
1404: Level: advanced
1406: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1407: @*/
1408: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1409: {
1410: const PetscSFNode *iremote;
1411: PetscSFNode *new_iremote;
1412: const PetscInt *ilocal;
1413: PetscInt i, nroots, *leaves, *new_ilocal;
1414: MPI_Comm comm;
1416: PetscFunctionBegin;
1418: PetscSFCheckGraphSet(sf, 1);
1419: if (nselected) PetscAssertPointer(selected, 3);
1420: PetscAssertPointer(newsf, 4);
1422: /* Uniq selected[] and put results in leaves[] */
1423: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1424: PetscCall(PetscMalloc1(nselected, &leaves));
1425: PetscCall(PetscArraycpy(leaves, selected, nselected));
1426: PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1427: PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1429: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1430: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1431: else {
1432: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1433: PetscCall(PetscMalloc1(nselected, &new_ilocal));
1434: PetscCall(PetscMalloc1(nselected, &new_iremote));
1435: for (i = 0; i < nselected; ++i) {
1436: const PetscInt l = leaves[i];
1437: new_ilocal[i] = ilocal ? ilocal[l] : l;
1438: new_iremote[i].rank = iremote[l].rank;
1439: new_iremote[i].index = iremote[l].index;
1440: }
1441: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1442: PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1443: }
1444: PetscCall(PetscFree(leaves));
1445: PetscFunctionReturn(PETSC_SUCCESS);
1446: }
1448: /*@C
1449: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1451: Collective
1453: Input Parameters:
1454: + sf - star forest on which to communicate
1455: . unit - data type associated with each node
1456: . rootdata - buffer to broadcast
1457: - op - operation to use for reduction
1459: Output Parameter:
1460: . leafdata - buffer to be reduced with values from each leaf's respective root
1462: Level: intermediate
1464: Note:
1465: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1466: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1467: use `PetscSFBcastWithMemTypeBegin()` instead.
1469: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1470: @*/
1471: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1472: {
1473: PetscMemType rootmtype, leafmtype;
1475: PetscFunctionBegin;
1477: PetscCall(PetscSFSetUp(sf));
1478: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1479: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1480: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1481: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1482: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1483: PetscFunctionReturn(PETSC_SUCCESS);
1484: }
1486: /*@C
1487: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1488: to `PetscSFBcastEnd()`
1490: Collective
1492: Input Parameters:
1493: + sf - star forest on which to communicate
1494: . unit - data type associated with each node
1495: . rootmtype - memory type of rootdata
1496: . rootdata - buffer to broadcast
1497: . leafmtype - memory type of leafdata
1498: - op - operation to use for reduction
1500: Output Parameter:
1501: . leafdata - buffer to be reduced with values from each leaf's respective root
1503: Level: intermediate
1505: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1506: @*/
1507: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1508: {
1509: PetscFunctionBegin;
1511: PetscCall(PetscSFSetUp(sf));
1512: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1513: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1514: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1515: PetscFunctionReturn(PETSC_SUCCESS);
1516: }
1518: /*@C
1519: PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1521: Collective
1523: Input Parameters:
1524: + sf - star forest
1525: . unit - data type
1526: . rootdata - buffer to broadcast
1527: - op - operation to use for reduction
1529: Output Parameter:
1530: . leafdata - buffer to be reduced with values from each leaf's respective root
1532: Level: intermediate
1534: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1535: @*/
1536: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1537: {
1538: PetscFunctionBegin;
1540: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1541: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1542: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1543: PetscFunctionReturn(PETSC_SUCCESS);
1544: }
1546: /*@C
1547: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1549: Collective
1551: Input Parameters:
1552: + sf - star forest
1553: . unit - data type
1554: . leafdata - values to reduce
1555: - op - reduction operation
1557: Output Parameter:
1558: . rootdata - result of reduction of values from all leaves of each root
1560: Level: intermediate
1562: Note:
1563: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1564: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1565: use `PetscSFReduceWithMemTypeBegin()` instead.
1567: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1568: @*/
1569: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1570: {
1571: PetscMemType rootmtype, leafmtype;
1573: PetscFunctionBegin;
1575: PetscCall(PetscSFSetUp(sf));
1576: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1577: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1578: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1579: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1580: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1581: PetscFunctionReturn(PETSC_SUCCESS);
1582: }
1584: /*@C
1585: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1587: Collective
1589: Input Parameters:
1590: + sf - star forest
1591: . unit - data type
1592: . leafmtype - memory type of leafdata
1593: . leafdata - values to reduce
1594: . rootmtype - memory type of rootdata
1595: - op - reduction operation
1597: Output Parameter:
1598: . rootdata - result of reduction of values from all leaves of each root
1600: Level: intermediate
1602: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1603: @*/
1604: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1605: {
1606: PetscFunctionBegin;
1608: PetscCall(PetscSFSetUp(sf));
1609: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1610: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1611: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1612: PetscFunctionReturn(PETSC_SUCCESS);
1613: }
1615: /*@C
1616: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1618: Collective
1620: Input Parameters:
1621: + sf - star forest
1622: . unit - data type
1623: . leafdata - values to reduce
1624: - op - reduction operation
1626: Output Parameter:
1627: . rootdata - result of reduction of values from all leaves of each root
1629: Level: intermediate
1631: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1632: @*/
1633: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1634: {
1635: PetscFunctionBegin;
1637: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1638: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1639: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1640: PetscFunctionReturn(PETSC_SUCCESS);
1641: }
1643: /*@C
1644: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1645: to be completed with `PetscSFFetchAndOpEnd()`
1647: Collective
1649: Input Parameters:
1650: + sf - star forest
1651: . unit - data type
1652: . leafdata - leaf values to use in reduction
1653: - op - operation to use for reduction
1655: Output Parameters:
1656: + rootdata - root values to be updated, input state is seen by first process to perform an update
1657: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1659: Level: advanced
1661: Note:
1662: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1663: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1664: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1665: integers.
1667: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1668: @*/
1669: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1670: {
1671: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1673: PetscFunctionBegin;
1675: PetscCall(PetscSFSetUp(sf));
1676: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1677: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1678: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1679: PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1680: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1681: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1682: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1683: PetscFunctionReturn(PETSC_SUCCESS);
1684: }
1686: /*@C
1687: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1688: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1690: Collective
1692: Input Parameters:
1693: + sf - star forest
1694: . unit - data type
1695: . rootmtype - memory type of rootdata
1696: . leafmtype - memory type of leafdata
1697: . leafdata - leaf values to use in reduction
1698: . leafupdatemtype - memory type of leafupdate
1699: - op - operation to use for reduction
1701: Output Parameters:
1702: + rootdata - root values to be updated, input state is seen by first process to perform an update
1703: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1705: Level: advanced
1707: Note:
1708: See `PetscSFFetchAndOpBegin()` for more details.
1710: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1711: @*/
1712: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1713: {
1714: PetscFunctionBegin;
1716: PetscCall(PetscSFSetUp(sf));
1717: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1718: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1719: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1720: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1721: PetscFunctionReturn(PETSC_SUCCESS);
1722: }
1724: /*@C
1725: PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1726: to fetch values from roots and update atomically by applying operation using my leaf value
1728: Collective
1730: Input Parameters:
1731: + sf - star forest
1732: . unit - data type
1733: . leafdata - leaf values to use in reduction
1734: - op - operation to use for reduction
1736: Output Parameters:
1737: + rootdata - root values to be updated, input state is seen by first process to perform an update
1738: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1740: Level: advanced
1742: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1743: @*/
1744: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1745: {
1746: PetscFunctionBegin;
1748: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1749: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1750: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1751: PetscFunctionReturn(PETSC_SUCCESS);
1752: }
1754: /*@C
1755: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1757: Collective
1759: Input Parameter:
1760: . sf - star forest
1762: Output Parameter:
1763: . degree - degree of each root vertex
1765: Level: advanced
1767: Note:
1768: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1770: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1771: @*/
1772: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1773: {
1774: PetscFunctionBegin;
1776: PetscSFCheckGraphSet(sf, 1);
1777: PetscAssertPointer(degree, 2);
1778: if (!sf->degreeknown) {
1779: PetscInt i, nroots = sf->nroots, maxlocal;
1780: PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1781: maxlocal = sf->maxleaf - sf->minleaf + 1;
1782: PetscCall(PetscMalloc1(nroots, &sf->degree));
1783: PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1784: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1785: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1786: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1787: }
1788: *degree = NULL;
1789: PetscFunctionReturn(PETSC_SUCCESS);
1790: }
1792: /*@C
1793: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1795: Collective
1797: Input Parameter:
1798: . sf - star forest
1800: Output Parameter:
1801: . degree - degree of each root vertex
1803: Level: developer
1805: Note:
1806: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1808: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1809: @*/
1810: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1811: {
1812: PetscFunctionBegin;
1814: PetscSFCheckGraphSet(sf, 1);
1815: PetscAssertPointer(degree, 2);
1816: if (!sf->degreeknown) {
1817: PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1818: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1819: PetscCall(PetscFree(sf->degreetmp));
1820: sf->degreeknown = PETSC_TRUE;
1821: }
1822: *degree = sf->degree;
1823: PetscFunctionReturn(PETSC_SUCCESS);
1824: }
1826: /*@C
1827: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1828: Each multi-root is assigned index of the corresponding original root.
1830: Collective
1832: Input Parameters:
1833: + sf - star forest
1834: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1836: Output Parameters:
1837: + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1838: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1840: Level: developer
1842: Note:
1843: The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1845: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1846: @*/
1847: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1848: {
1849: PetscSF msf;
1850: PetscInt i, j, k, nroots, nmroots;
1852: PetscFunctionBegin;
1854: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1855: if (nroots) PetscAssertPointer(degree, 2);
1856: if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1857: PetscAssertPointer(multiRootsOrigNumbering, 4);
1858: PetscCall(PetscSFGetMultiSF(sf, &msf));
1859: PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1860: PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1861: for (i = 0, j = 0, k = 0; i < nroots; i++) {
1862: if (!degree[i]) continue;
1863: for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1864: }
1865: PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1866: if (nMultiRoots) *nMultiRoots = nmroots;
1867: PetscFunctionReturn(PETSC_SUCCESS);
1868: }
1870: /*@C
1871: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1873: Collective
1875: Input Parameters:
1876: + sf - star forest
1877: . unit - data type
1878: - leafdata - leaf data to gather to roots
1880: Output Parameter:
1881: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1883: Level: intermediate
1885: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1886: @*/
1887: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1888: {
1889: PetscSF multi = NULL;
1891: PetscFunctionBegin;
1893: PetscCall(PetscSFSetUp(sf));
1894: PetscCall(PetscSFGetMultiSF(sf, &multi));
1895: PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1896: PetscFunctionReturn(PETSC_SUCCESS);
1897: }
1899: /*@C
1900: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1902: Collective
1904: Input Parameters:
1905: + sf - star forest
1906: . unit - data type
1907: - leafdata - leaf data to gather to roots
1909: Output Parameter:
1910: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1912: Level: intermediate
1914: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1915: @*/
1916: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1917: {
1918: PetscSF multi = NULL;
1920: PetscFunctionBegin;
1922: PetscCall(PetscSFGetMultiSF(sf, &multi));
1923: PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1924: PetscFunctionReturn(PETSC_SUCCESS);
1925: }
1927: /*@C
1928: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1930: Collective
1932: Input Parameters:
1933: + sf - star forest
1934: . unit - data type
1935: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1937: Output Parameter:
1938: . leafdata - leaf data to be update with personal data from each respective root
1940: Level: intermediate
1942: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1943: @*/
1944: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1945: {
1946: PetscSF multi = NULL;
1948: PetscFunctionBegin;
1950: PetscCall(PetscSFSetUp(sf));
1951: PetscCall(PetscSFGetMultiSF(sf, &multi));
1952: PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1953: PetscFunctionReturn(PETSC_SUCCESS);
1954: }
1956: /*@C
1957: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1959: Collective
1961: Input Parameters:
1962: + sf - star forest
1963: . unit - data type
1964: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1966: Output Parameter:
1967: . leafdata - leaf data to be update with personal data from each respective root
1969: Level: intermediate
1971: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1972: @*/
1973: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1974: {
1975: PetscSF multi = NULL;
1977: PetscFunctionBegin;
1979: PetscCall(PetscSFGetMultiSF(sf, &multi));
1980: PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1981: PetscFunctionReturn(PETSC_SUCCESS);
1982: }
1984: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1985: {
1986: PetscInt i, n, nleaves;
1987: const PetscInt *ilocal = NULL;
1988: PetscHSetI seen;
1990: PetscFunctionBegin;
1991: if (PetscDefined(USE_DEBUG)) {
1992: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1993: PetscCall(PetscHSetICreate(&seen));
1994: for (i = 0; i < nleaves; i++) {
1995: const PetscInt leaf = ilocal ? ilocal[i] : i;
1996: PetscCall(PetscHSetIAdd(seen, leaf));
1997: }
1998: PetscCall(PetscHSetIGetSize(seen, &n));
1999: PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2000: PetscCall(PetscHSetIDestroy(&seen));
2001: }
2002: PetscFunctionReturn(PETSC_SUCCESS);
2003: }
2005: /*@
2006: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2008: Input Parameters:
2009: + sfA - The first `PetscSF`
2010: - sfB - The second `PetscSF`
2012: Output Parameter:
2013: . sfBA - The composite `PetscSF`
2015: Level: developer
2017: Notes:
2018: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2019: forests, i.e. the same leaf is not connected with different roots.
2021: `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2022: a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2023: nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2024: `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2026: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2027: @*/
2028: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2029: {
2030: const PetscSFNode *remotePointsA, *remotePointsB;
2031: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2032: const PetscInt *localPointsA, *localPointsB;
2033: PetscInt *localPointsBA;
2034: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2035: PetscBool denseB;
2037: PetscFunctionBegin;
2039: PetscSFCheckGraphSet(sfA, 1);
2041: PetscSFCheckGraphSet(sfB, 2);
2042: PetscCheckSameComm(sfA, 1, sfB, 2);
2043: PetscAssertPointer(sfBA, 3);
2044: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2045: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2047: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2048: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2049: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2050: numRootsB; otherwise, garbage will be broadcasted.
2051: Example (comm size = 1):
2052: sfA: 0 <- (0, 0)
2053: sfB: 100 <- (0, 0)
2054: 101 <- (0, 1)
2055: Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2056: of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2057: receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2058: remotePointsA; if not recasted, point 101 would receive a garbage value. */
2059: PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2060: for (i = 0; i < numRootsB; i++) {
2061: reorderedRemotePointsA[i].rank = -1;
2062: reorderedRemotePointsA[i].index = -1;
2063: }
2064: for (i = 0; i < numLeavesA; i++) {
2065: PetscInt localp = localPointsA ? localPointsA[i] : i;
2067: if (localp >= numRootsB) continue;
2068: reorderedRemotePointsA[localp] = remotePointsA[i];
2069: }
2070: remotePointsA = reorderedRemotePointsA;
2071: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2072: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2073: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2074: leafdataB[i].rank = -1;
2075: leafdataB[i].index = -1;
2076: }
2077: PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2078: PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2079: PetscCall(PetscFree(reorderedRemotePointsA));
2081: denseB = (PetscBool)!localPointsB;
2082: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2083: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2084: else numLeavesBA++;
2085: }
2086: if (denseB) {
2087: localPointsBA = NULL;
2088: remotePointsBA = leafdataB;
2089: } else {
2090: PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2091: PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2092: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2093: const PetscInt l = localPointsB ? localPointsB[i] : i;
2095: if (leafdataB[l - minleaf].rank == -1) continue;
2096: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2097: localPointsBA[numLeavesBA] = l;
2098: numLeavesBA++;
2099: }
2100: PetscCall(PetscFree(leafdataB));
2101: }
2102: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2103: PetscCall(PetscSFSetFromOptions(*sfBA));
2104: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2105: PetscFunctionReturn(PETSC_SUCCESS);
2106: }
2108: /*@
2109: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2111: Input Parameters:
2112: + sfA - The first `PetscSF`
2113: - sfB - The second `PetscSF`
2115: Output Parameter:
2116: . sfBA - The composite `PetscSF`.
2118: Level: developer
2120: Notes:
2121: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2122: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2123: second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2125: `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2126: a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2127: roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2128: on `sfA`, then
2129: a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2131: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2132: @*/
2133: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2134: {
2135: const PetscSFNode *remotePointsA, *remotePointsB;
2136: PetscSFNode *remotePointsBA;
2137: const PetscInt *localPointsA, *localPointsB;
2138: PetscSFNode *reorderedRemotePointsA = NULL;
2139: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2140: MPI_Op op;
2141: #if defined(PETSC_USE_64BIT_INDICES)
2142: PetscBool iswin;
2143: #endif
2145: PetscFunctionBegin;
2147: PetscSFCheckGraphSet(sfA, 1);
2149: PetscSFCheckGraphSet(sfB, 2);
2150: PetscCheckSameComm(sfA, 1, sfB, 2);
2151: PetscAssertPointer(sfBA, 3);
2152: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2153: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2155: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2156: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2158: /* TODO: Check roots of sfB have degree of 1 */
2159: /* Once we implement it, we can replace the MPI_MAXLOC
2160: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2161: We use MPI_MAXLOC only to have a deterministic output from this routine if
2162: the root condition is not meet.
2163: */
2164: op = MPI_MAXLOC;
2165: #if defined(PETSC_USE_64BIT_INDICES)
2166: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2167: PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2168: if (iswin) op = MPI_REPLACE;
2169: #endif
2171: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2172: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2173: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2174: reorderedRemotePointsA[i].rank = -1;
2175: reorderedRemotePointsA[i].index = -1;
2176: }
2177: if (localPointsA) {
2178: for (i = 0; i < numLeavesA; i++) {
2179: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2180: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2181: }
2182: } else {
2183: for (i = 0; i < numLeavesA; i++) {
2184: if (i > maxleaf || i < minleaf) continue;
2185: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2186: }
2187: }
2189: PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2190: PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2191: for (i = 0; i < numRootsB; i++) {
2192: remotePointsBA[i].rank = -1;
2193: remotePointsBA[i].index = -1;
2194: }
2196: PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2197: PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2198: PetscCall(PetscFree(reorderedRemotePointsA));
2199: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2200: if (remotePointsBA[i].rank == -1) continue;
2201: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2202: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2203: localPointsBA[numLeavesBA] = i;
2204: numLeavesBA++;
2205: }
2206: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2207: PetscCall(PetscSFSetFromOptions(*sfBA));
2208: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2209: PetscFunctionReturn(PETSC_SUCCESS);
2210: }
2212: /*
2213: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2215: Input Parameter:
2216: . sf - The global `PetscSF`
2218: Output Parameter:
2219: . out - The local `PetscSF`
2221: .seealso: `PetscSF`, `PetscSFCreate()`
2222: */
2223: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2224: {
2225: MPI_Comm comm;
2226: PetscMPIInt myrank;
2227: const PetscInt *ilocal;
2228: const PetscSFNode *iremote;
2229: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2230: PetscSFNode *liremote;
2231: PetscSF lsf;
2233: PetscFunctionBegin;
2235: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2236: else {
2237: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2238: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2239: PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2241: /* Find out local edges and build a local SF */
2242: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2243: for (i = lnleaves = 0; i < nleaves; i++) {
2244: if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2245: }
2246: PetscCall(PetscMalloc1(lnleaves, &lilocal));
2247: PetscCall(PetscMalloc1(lnleaves, &liremote));
2249: for (i = j = 0; i < nleaves; i++) {
2250: if (iremote[i].rank == (PetscInt)myrank) {
2251: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2252: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2253: liremote[j].index = iremote[i].index;
2254: j++;
2255: }
2256: }
2257: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2258: PetscCall(PetscSFSetFromOptions(lsf));
2259: PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2260: PetscCall(PetscSFSetUp(lsf));
2261: *out = lsf;
2262: }
2263: PetscFunctionReturn(PETSC_SUCCESS);
2264: }
2266: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2267: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2268: {
2269: PetscMemType rootmtype, leafmtype;
2271: PetscFunctionBegin;
2273: PetscCall(PetscSFSetUp(sf));
2274: PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2275: PetscCall(PetscGetMemType(rootdata, &rootmtype));
2276: PetscCall(PetscGetMemType(leafdata, &leafmtype));
2277: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2278: PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2279: PetscFunctionReturn(PETSC_SUCCESS);
2280: }
2282: /*@
2283: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2285: Input Parameters:
2286: + comm - the communicator
2287: . nsfs - the number of input `PetscSF`
2288: . sfs - the array of input `PetscSF`
2289: . rootMode - the root mode specifying how roots are handled
2290: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2292: Output Parameter:
2293: . newsf - The resulting `PetscSF`
2295: Level: advanced
2297: Notes:
2298: The communicator of all `PetscSF`s in `sfs` must be comm.
2300: Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2302: The offsets in `leafOffsets` are added to the original leaf indices.
2304: If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2305: In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2307: If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2308: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2310: All root modes retain the essential connectivity condition.
2311: If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2312: Parameter `rootMode` controls how the input root spaces are combined.
2313: For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2314: and is also the same in the output `PetscSF`.
2315: For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2316: `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2317: roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2318: `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2319: roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2320: the original root ranks are ignored.
2321: For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2322: the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2323: to keep the load balancing.
2324: However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2326: Example:
2327: We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2328: .vb
2329: make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2330: for m in {local,global,shared}; do
2331: mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2332: done
2333: .ve
2334: we generate two identical `PetscSF`s sf_0 and sf_1,
2335: .vb
2336: PetscSF Object: sf_0 2 MPI processes
2337: type: basic
2338: rank #leaves #roots
2339: [ 0] 4 2
2340: [ 1] 4 2
2341: leaves roots roots in global numbering
2342: ( 0, 0) <- ( 0, 0) = 0
2343: ( 0, 1) <- ( 0, 1) = 1
2344: ( 0, 2) <- ( 1, 0) = 2
2345: ( 0, 3) <- ( 1, 1) = 3
2346: ( 1, 0) <- ( 0, 0) = 0
2347: ( 1, 1) <- ( 0, 1) = 1
2348: ( 1, 2) <- ( 1, 0) = 2
2349: ( 1, 3) <- ( 1, 1) = 3
2350: .ve
2351: and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2352: .vb
2353: rootMode = local:
2354: PetscSF Object: result_sf 2 MPI processes
2355: type: basic
2356: rank #leaves #roots
2357: [ 0] 8 4
2358: [ 1] 8 4
2359: leaves roots roots in global numbering
2360: ( 0, 0) <- ( 0, 0) = 0
2361: ( 0, 1) <- ( 0, 1) = 1
2362: ( 0, 2) <- ( 1, 0) = 4
2363: ( 0, 3) <- ( 1, 1) = 5
2364: ( 0, 4) <- ( 0, 2) = 2
2365: ( 0, 5) <- ( 0, 3) = 3
2366: ( 0, 6) <- ( 1, 2) = 6
2367: ( 0, 7) <- ( 1, 3) = 7
2368: ( 1, 0) <- ( 0, 0) = 0
2369: ( 1, 1) <- ( 0, 1) = 1
2370: ( 1, 2) <- ( 1, 0) = 4
2371: ( 1, 3) <- ( 1, 1) = 5
2372: ( 1, 4) <- ( 0, 2) = 2
2373: ( 1, 5) <- ( 0, 3) = 3
2374: ( 1, 6) <- ( 1, 2) = 6
2375: ( 1, 7) <- ( 1, 3) = 7
2377: rootMode = global:
2378: PetscSF Object: result_sf 2 MPI processes
2379: type: basic
2380: rank #leaves #roots
2381: [ 0] 8 4
2382: [ 1] 8 4
2383: leaves roots roots in global numbering
2384: ( 0, 0) <- ( 0, 0) = 0
2385: ( 0, 1) <- ( 0, 1) = 1
2386: ( 0, 2) <- ( 0, 2) = 2
2387: ( 0, 3) <- ( 0, 3) = 3
2388: ( 0, 4) <- ( 1, 0) = 4
2389: ( 0, 5) <- ( 1, 1) = 5
2390: ( 0, 6) <- ( 1, 2) = 6
2391: ( 0, 7) <- ( 1, 3) = 7
2392: ( 1, 0) <- ( 0, 0) = 0
2393: ( 1, 1) <- ( 0, 1) = 1
2394: ( 1, 2) <- ( 0, 2) = 2
2395: ( 1, 3) <- ( 0, 3) = 3
2396: ( 1, 4) <- ( 1, 0) = 4
2397: ( 1, 5) <- ( 1, 1) = 5
2398: ( 1, 6) <- ( 1, 2) = 6
2399: ( 1, 7) <- ( 1, 3) = 7
2401: rootMode = shared:
2402: PetscSF Object: result_sf 2 MPI processes
2403: type: basic
2404: rank #leaves #roots
2405: [ 0] 8 2
2406: [ 1] 8 2
2407: leaves roots roots in global numbering
2408: ( 0, 0) <- ( 0, 0) = 0
2409: ( 0, 1) <- ( 0, 1) = 1
2410: ( 0, 2) <- ( 1, 0) = 2
2411: ( 0, 3) <- ( 1, 1) = 3
2412: ( 0, 4) <- ( 0, 0) = 0
2413: ( 0, 5) <- ( 0, 1) = 1
2414: ( 0, 6) <- ( 1, 0) = 2
2415: ( 0, 7) <- ( 1, 1) = 3
2416: ( 1, 0) <- ( 0, 0) = 0
2417: ( 1, 1) <- ( 0, 1) = 1
2418: ( 1, 2) <- ( 1, 0) = 2
2419: ( 1, 3) <- ( 1, 1) = 3
2420: ( 1, 4) <- ( 0, 0) = 0
2421: ( 1, 5) <- ( 0, 1) = 1
2422: ( 1, 6) <- ( 1, 0) = 2
2423: ( 1, 7) <- ( 1, 1) = 3
2424: .ve
2426: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2427: @*/
2428: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2429: {
2430: PetscInt i, s, nLeaves, nRoots;
2431: PetscInt *leafArrayOffsets;
2432: PetscInt *ilocal_new;
2433: PetscSFNode *iremote_new;
2434: PetscBool all_ilocal_null = PETSC_FALSE;
2435: PetscLayout glayout = NULL;
2436: PetscInt *gremote = NULL;
2437: PetscMPIInt rank, size;
2439: PetscFunctionBegin;
2440: if (PetscDefined(USE_DEBUG)) {
2441: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2443: PetscCall(PetscSFCreate(comm, &dummy));
2445: PetscAssertPointer(sfs, 3);
2446: for (i = 0; i < nsfs; i++) {
2448: PetscCheckSameComm(dummy, 1, sfs[i], 3);
2449: }
2451: if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2452: PetscAssertPointer(newsf, 6);
2453: PetscCall(PetscSFDestroy(&dummy));
2454: }
2455: if (!nsfs) {
2456: PetscCall(PetscSFCreate(comm, newsf));
2457: PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2458: PetscFunctionReturn(PETSC_SUCCESS);
2459: }
2460: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2461: PetscCallMPI(MPI_Comm_size(comm, &size));
2463: /* Calculate leaf array offsets */
2464: PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2465: leafArrayOffsets[0] = 0;
2466: for (s = 0; s < nsfs; s++) {
2467: PetscInt nl;
2469: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2470: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2471: }
2472: nLeaves = leafArrayOffsets[nsfs];
2474: /* Calculate number of roots */
2475: switch (rootMode) {
2476: case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2477: PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2478: if (PetscDefined(USE_DEBUG)) {
2479: for (s = 1; s < nsfs; s++) {
2480: PetscInt nr;
2482: PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2483: PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2484: }
2485: }
2486: } break;
2487: case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2488: /* Calculate also global layout in this case */
2489: PetscInt *nls;
2490: PetscLayout *lts;
2491: PetscInt **inds;
2492: PetscInt j;
2493: PetscInt rootOffset = 0;
2495: PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
2496: PetscCall(PetscLayoutCreate(comm, &glayout));
2497: glayout->bs = 1;
2498: glayout->n = 0;
2499: glayout->N = 0;
2500: for (s = 0; s < nsfs; s++) {
2501: PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
2502: glayout->n += lts[s]->n;
2503: glayout->N += lts[s]->N;
2504: }
2505: PetscCall(PetscLayoutSetUp(glayout));
2506: PetscCall(PetscMalloc1(nLeaves, &gremote));
2507: for (s = 0, j = 0; s < nsfs; s++) {
2508: for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2509: rootOffset += lts[s]->N;
2510: PetscCall(PetscLayoutDestroy(<s[s]));
2511: PetscCall(PetscFree(inds[s]));
2512: }
2513: PetscCall(PetscFree3(lts, nls, inds));
2514: nRoots = glayout->N;
2515: } break;
2516: case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2517: /* nRoots calculated later in this case */
2518: break;
2519: default:
2520: SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2521: }
2523: if (!leafOffsets) {
2524: all_ilocal_null = PETSC_TRUE;
2525: for (s = 0; s < nsfs; s++) {
2526: const PetscInt *ilocal;
2528: PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2529: if (ilocal) {
2530: all_ilocal_null = PETSC_FALSE;
2531: break;
2532: }
2533: }
2534: PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2535: }
2537: /* Renumber and concatenate local leaves */
2538: ilocal_new = NULL;
2539: if (!all_ilocal_null) {
2540: PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2541: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2542: for (s = 0; s < nsfs; s++) {
2543: const PetscInt *ilocal;
2544: PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2545: PetscInt i, nleaves_l;
2547: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2548: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2549: }
2550: }
2552: /* Renumber and concatenate remote roots */
2553: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2554: PetscInt rootOffset = 0;
2556: PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2557: for (i = 0; i < nLeaves; i++) {
2558: iremote_new[i].rank = -1;
2559: iremote_new[i].index = -1;
2560: }
2561: for (s = 0; s < nsfs; s++) {
2562: PetscInt i, nl, nr;
2563: PetscSF tmp_sf;
2564: const PetscSFNode *iremote;
2565: PetscSFNode *tmp_rootdata;
2566: PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2568: PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2569: PetscCall(PetscSFCreate(comm, &tmp_sf));
2570: /* create helper SF with contiguous leaves */
2571: PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2572: PetscCall(PetscSFSetUp(tmp_sf));
2573: PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2574: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2575: for (i = 0; i < nr; i++) {
2576: tmp_rootdata[i].index = i + rootOffset;
2577: tmp_rootdata[i].rank = (PetscInt)rank;
2578: }
2579: rootOffset += nr;
2580: } else {
2581: for (i = 0; i < nr; i++) {
2582: tmp_rootdata[i].index = i;
2583: tmp_rootdata[i].rank = (PetscInt)rank;
2584: }
2585: }
2586: PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2587: PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2588: PetscCall(PetscSFDestroy(&tmp_sf));
2589: PetscCall(PetscFree(tmp_rootdata));
2590: }
2591: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2593: /* Build the new SF */
2594: PetscCall(PetscSFCreate(comm, newsf));
2595: PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2596: } else {
2597: /* Build the new SF */
2598: PetscCall(PetscSFCreate(comm, newsf));
2599: PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2600: }
2601: PetscCall(PetscSFSetUp(*newsf));
2602: PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2603: PetscCall(PetscLayoutDestroy(&glayout));
2604: PetscCall(PetscFree(gremote));
2605: PetscCall(PetscFree(leafArrayOffsets));
2606: PetscFunctionReturn(PETSC_SUCCESS);
2607: }