Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/hashmapi.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #include <petscdevice_cuda.h>
9: #endif
11: #if defined(PETSC_HAVE_HIP)
12: #include <hip/hip_runtime.h>
13: #endif
15: #if defined(PETSC_CLANG_STATIC_ANALYZER)
16: extern void PetscSFCheckGraphSet(PetscSF, int);
17: #else
18: #if defined(PETSC_USE_DEBUG)
19: #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20: #else
21: #define PetscSFCheckGraphSet(sf, arg) \
22: do { \
23: } while (0)
24: #endif
25: #endif
27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
30: /*@
31: PetscSFCreate - create a star forest communication context
33: Collective
35: Input Parameter:
36: . comm - communicator on which the star forest will operate
38: Output Parameter:
39: . sf - new star forest context
41: Options Database Key:
42: . -sf_type type - value of type may be
43: .vb
44: basic -Use MPI persistent Isend/Irecv for communication (Default)
45: window -Use MPI-3 one-sided window for communication
46: neighbor -Use MPI-3 neighborhood collectives for communication
47: .ve
49: Level: intermediate
51: Note:
52: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
53: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
54: `SF`s are optimized and they have better performance than the general `SF`s.
56: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
57: @*/
58: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
59: {
60: PetscSF b;
62: PetscFunctionBegin;
63: PetscAssertPointer(sf, 2);
64: PetscCall(PetscSFInitializePackage());
66: PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
68: b->nroots = -1;
69: b->nleaves = -1;
70: b->minleaf = PETSC_MAX_INT;
71: b->maxleaf = PETSC_MIN_INT;
72: b->nranks = -1;
73: b->rankorder = PETSC_TRUE;
74: b->ingroup = MPI_GROUP_NULL;
75: b->outgroup = MPI_GROUP_NULL;
76: b->graphset = PETSC_FALSE;
77: #if defined(PETSC_HAVE_DEVICE)
78: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
79: b->use_stream_aware_mpi = PETSC_FALSE;
80: b->unknown_input_stream = PETSC_FALSE;
81: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
82: b->backend = PETSCSF_BACKEND_KOKKOS;
83: #elif defined(PETSC_HAVE_CUDA)
84: b->backend = PETSCSF_BACKEND_CUDA;
85: #elif defined(PETSC_HAVE_HIP)
86: b->backend = PETSCSF_BACKEND_HIP;
87: #endif
89: #if defined(PETSC_HAVE_NVSHMEM)
90: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
91: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
92: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
93: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
94: #endif
95: #endif
96: b->vscat.from_n = -1;
97: b->vscat.to_n = -1;
98: b->vscat.unit = MPIU_SCALAR;
99: *sf = b;
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: /*@
104: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
106: Collective
108: Input Parameter:
109: . sf - star forest
111: Level: advanced
113: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
114: @*/
115: PetscErrorCode PetscSFReset(PetscSF sf)
116: {
117: PetscFunctionBegin;
119: PetscTryTypeMethod(sf, Reset);
120: sf->nroots = -1;
121: sf->nleaves = -1;
122: sf->minleaf = PETSC_MAX_INT;
123: sf->maxleaf = PETSC_MIN_INT;
124: sf->mine = NULL;
125: sf->remote = NULL;
126: sf->graphset = PETSC_FALSE;
127: PetscCall(PetscFree(sf->mine_alloc));
128: PetscCall(PetscFree(sf->remote_alloc));
129: sf->nranks = -1;
130: PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
131: sf->degreeknown = PETSC_FALSE;
132: PetscCall(PetscFree(sf->degree));
133: if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
134: if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
135: if (sf->multi) sf->multi->multi = NULL;
136: PetscCall(PetscSFDestroy(&sf->multi));
137: PetscCall(PetscLayoutDestroy(&sf->map));
139: #if defined(PETSC_HAVE_DEVICE)
140: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141: #endif
143: sf->setupcalled = PETSC_FALSE;
144: PetscFunctionReturn(PETSC_SUCCESS);
145: }
147: /*@C
148: PetscSFSetType - Set the `PetscSF` communication implementation
150: Collective
152: Input Parameters:
153: + sf - the `PetscSF` context
154: - type - a known method
155: .vb
156: PETSCSFWINDOW - MPI-2/3 one-sided
157: PETSCSFBASIC - basic implementation using MPI-1 two-sided
158: .ve
160: Options Database Key:
161: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163: Level: intermediate
165: Notes:
166: See `PetscSFType` for possible values
168: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169: @*/
170: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171: {
172: PetscBool match;
173: PetscErrorCode (*r)(PetscSF);
175: PetscFunctionBegin;
177: PetscAssertPointer(type, 2);
179: PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180: if (match) PetscFunctionReturn(PETSC_SUCCESS);
182: PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183: PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184: /* Destroy the previous PetscSF implementation context */
185: PetscTryTypeMethod(sf, Destroy);
186: PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187: PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188: PetscCall((*r)(sf));
189: PetscFunctionReturn(PETSC_SUCCESS);
190: }
192: /*@C
193: PetscSFGetType - Get the `PetscSF` communication implementation
195: Not Collective
197: Input Parameter:
198: . sf - the `PetscSF` context
200: Output Parameter:
201: . type - the `PetscSF` type name
203: Level: intermediate
205: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206: @*/
207: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208: {
209: PetscFunctionBegin;
211: PetscAssertPointer(type, 2);
212: *type = ((PetscObject)sf)->type_name;
213: PetscFunctionReturn(PETSC_SUCCESS);
214: }
216: /*@C
217: PetscSFDestroy - destroy a star forest
219: Collective
221: Input Parameter:
222: . sf - address of star forest
224: Level: intermediate
226: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227: @*/
228: PetscErrorCode PetscSFDestroy(PetscSF *sf)
229: {
230: PetscFunctionBegin;
231: if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233: if (--((PetscObject)(*sf))->refct > 0) {
234: *sf = NULL;
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
237: PetscCall(PetscSFReset(*sf));
238: PetscTryTypeMethod((*sf), Destroy);
239: PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240: if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242: if ((*sf)->use_stream_aware_mpi) {
243: PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244: PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245: }
246: #endif
247: PetscCall(PetscHeaderDestroy(sf));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252: {
253: PetscInt i, nleaves;
254: PetscMPIInt size;
255: const PetscInt *ilocal;
256: const PetscSFNode *iremote;
258: PetscFunctionBegin;
259: if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262: for (i = 0; i < nleaves; i++) {
263: const PetscInt rank = iremote[i].rank;
264: const PetscInt remote = iremote[i].index;
265: const PetscInt leaf = ilocal ? ilocal[i] : i;
266: PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267: PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268: PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269: }
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }
273: /*@
274: PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
276: Collective
278: Input Parameter:
279: . sf - star forest communication object
281: Level: beginner
283: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284: @*/
285: PetscErrorCode PetscSFSetUp(PetscSF sf)
286: {
287: PetscFunctionBegin;
289: PetscSFCheckGraphSet(sf, 1);
290: if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291: PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292: PetscCall(PetscSFCheckGraphValid_Private(sf));
293: if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294: PetscTryTypeMethod(sf, SetUp);
295: #if defined(PETSC_HAVE_CUDA)
296: if (sf->backend == PETSCSF_BACKEND_CUDA) {
297: sf->ops->Malloc = PetscSFMalloc_CUDA;
298: sf->ops->Free = PetscSFFree_CUDA;
299: }
300: #endif
301: #if defined(PETSC_HAVE_HIP)
302: if (sf->backend == PETSCSF_BACKEND_HIP) {
303: sf->ops->Malloc = PetscSFMalloc_HIP;
304: sf->ops->Free = PetscSFFree_HIP;
305: }
306: #endif
308: #if defined(PETSC_HAVE_KOKKOS)
309: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310: sf->ops->Malloc = PetscSFMalloc_Kokkos;
311: sf->ops->Free = PetscSFFree_Kokkos;
312: }
313: #endif
314: PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315: sf->setupcalled = PETSC_TRUE;
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: /*@
320: PetscSFSetFromOptions - set `PetscSF` options using the options database
322: Logically Collective
324: Input Parameter:
325: . sf - star forest
327: Options Database Keys:
328: + -sf_type - implementation type, see `PetscSFSetType()`
329: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
330: . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331: use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332: If true, this option only works with `-use_gpu_aware_mpi 1`.
333: . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334: If true, this option only works with `-use_gpu_aware_mpi 1`.
336: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338: the only available is kokkos.
340: Level: intermediate
342: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343: @*/
344: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345: {
346: PetscSFType deft;
347: char type[256];
348: PetscBool flg;
350: PetscFunctionBegin;
352: PetscObjectOptionsBegin((PetscObject)sf);
353: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354: PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355: PetscCall(PetscSFSetType(sf, flg ? type : deft));
356: PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357: #if defined(PETSC_HAVE_DEVICE)
358: {
359: char backendstr[32] = {0};
360: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
361: /* Change the defaults set in PetscSFCreate() with command line options */
362: PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
363: PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
364: PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
365: PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
366: PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
367: PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
368: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
369: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
370: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
371: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
372: else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
373: #elif defined(PETSC_HAVE_KOKKOS)
374: PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
375: #endif
377: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
378: if (sf->use_stream_aware_mpi) {
379: MPI_Info info;
381: PetscCallMPI(MPI_Info_create(&info));
382: PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
383: PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
384: PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
385: PetscCallMPI(MPI_Info_free(&info));
386: PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
387: }
388: #endif
389: }
390: #endif
391: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
392: PetscOptionsEnd();
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: /*@
397: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
399: Logically Collective
401: Input Parameters:
402: + sf - star forest
403: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
405: Level: advanced
407: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
408: @*/
409: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
410: {
411: PetscFunctionBegin;
414: PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
415: sf->rankorder = flg;
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: /*@C
420: PetscSFSetGraph - Set a parallel star forest
422: Collective
424: Input Parameters:
425: + sf - star forest
426: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
427: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
428: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
429: during setup in debug mode)
430: . localmode - copy mode for `ilocal`
431: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
432: during setup in debug mode)
433: - remotemode - copy mode for `iremote`
435: Level: intermediate
437: Notes:
438: Leaf indices in `ilocal` must be unique, otherwise an error occurs.
440: Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
441: In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
442: PETSc might modify the respective array;
443: if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
444: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446: Fortran Notes:
447: In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449: Developer Notes:
450: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
451: This also allows to compare leaf sets of two `PetscSF`s easily.
453: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
454: @*/
455: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
456: {
457: PetscBool unique, contiguous;
459: PetscFunctionBegin;
461: if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
462: if (nleaves > 0) PetscAssertPointer(iremote, 6);
463: PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
464: PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
465: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
466: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
467: PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
468: PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470: if (sf->nroots >= 0) { /* Reset only if graph already set */
471: PetscCall(PetscSFReset(sf));
472: }
474: PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476: sf->nroots = nroots;
477: sf->nleaves = nleaves;
479: if (localmode == PETSC_COPY_VALUES && ilocal) {
480: PetscInt *tlocal = NULL;
482: PetscCall(PetscMalloc1(nleaves, &tlocal));
483: PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
484: ilocal = tlocal;
485: }
486: if (remotemode == PETSC_COPY_VALUES) {
487: PetscSFNode *tremote = NULL;
489: PetscCall(PetscMalloc1(nleaves, &tremote));
490: PetscCall(PetscArraycpy(tremote, iremote, nleaves));
491: iremote = tremote;
492: }
494: if (nleaves && ilocal) {
495: PetscSFNode work;
497: PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
498: PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
499: unique = PetscNot(unique);
500: PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
501: sf->minleaf = ilocal[0];
502: sf->maxleaf = ilocal[nleaves - 1];
503: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
504: } else {
505: sf->minleaf = 0;
506: sf->maxleaf = nleaves - 1;
507: unique = PETSC_TRUE;
508: contiguous = PETSC_TRUE;
509: }
511: if (contiguous) {
512: if (localmode == PETSC_USE_POINTER) {
513: ilocal = NULL;
514: } else {
515: PetscCall(PetscFree(ilocal));
516: }
517: }
518: sf->mine = ilocal;
519: if (localmode == PETSC_USE_POINTER) {
520: sf->mine_alloc = NULL;
521: } else {
522: sf->mine_alloc = ilocal;
523: }
524: sf->remote = iremote;
525: if (remotemode == PETSC_USE_POINTER) {
526: sf->remote_alloc = NULL;
527: } else {
528: sf->remote_alloc = iremote;
529: }
530: PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
531: sf->graphset = PETSC_TRUE;
532: PetscFunctionReturn(PETSC_SUCCESS);
533: }
535: /*@
536: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
538: Collective
540: Input Parameters:
541: + sf - The `PetscSF`
542: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
543: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
545: Level: intermediate
547: Notes:
548: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
549: `n` and `N` are the local and global sizes of `x` respectively.
551: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
552: sequential vectors `y` on all MPI processes.
554: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
555: sequential vector `y` on rank 0.
557: In above cases, entries of `x` are roots and entries of `y` are leaves.
559: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
560: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
561: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
562: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
563: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
565: In this case, roots and leaves are symmetric.
567: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
568: @*/
569: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
570: {
571: MPI_Comm comm;
572: PetscInt n, N, res[2];
573: PetscMPIInt rank, size;
574: PetscSFType type;
576: PetscFunctionBegin;
578: if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
579: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
580: PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
581: PetscCallMPI(MPI_Comm_rank(comm, &rank));
582: PetscCallMPI(MPI_Comm_size(comm, &size));
584: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
585: type = PETSCSFALLTOALL;
586: PetscCall(PetscLayoutCreate(comm, &sf->map));
587: PetscCall(PetscLayoutSetLocalSize(sf->map, size));
588: PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
589: PetscCall(PetscLayoutSetUp(sf->map));
590: } else {
591: PetscCall(PetscLayoutGetLocalSize(map, &n));
592: PetscCall(PetscLayoutGetSize(map, &N));
593: res[0] = n;
594: res[1] = -n;
595: /* Check if n are same over all ranks so that we can optimize it */
596: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
597: if (res[0] == -res[1]) { /* same n */
598: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
599: } else {
600: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
601: }
602: PetscCall(PetscLayoutReference(map, &sf->map));
603: }
604: PetscCall(PetscSFSetType(sf, type));
606: sf->pattern = pattern;
607: sf->mine = NULL; /* Contiguous */
609: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
610: Also set other easy stuff.
611: */
612: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
613: sf->nleaves = N;
614: sf->nroots = n;
615: sf->nranks = size;
616: sf->minleaf = 0;
617: sf->maxleaf = N - 1;
618: } else if (pattern == PETSCSF_PATTERN_GATHER) {
619: sf->nleaves = rank ? 0 : N;
620: sf->nroots = n;
621: sf->nranks = rank ? 0 : size;
622: sf->minleaf = 0;
623: sf->maxleaf = rank ? -1 : N - 1;
624: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
625: sf->nleaves = size;
626: sf->nroots = size;
627: sf->nranks = size;
628: sf->minleaf = 0;
629: sf->maxleaf = size - 1;
630: }
631: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
632: sf->graphset = PETSC_TRUE;
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: /*@
637: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
639: Collective
641: Input Parameter:
642: . sf - star forest to invert
644: Output Parameter:
645: . isf - inverse of `sf`
647: Level: advanced
649: Notes:
650: All roots must have degree 1.
652: The local space may be a permutation, but cannot be sparse.
654: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
655: @*/
656: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
657: {
658: PetscMPIInt rank;
659: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
660: const PetscInt *ilocal;
661: PetscSFNode *roots, *leaves;
663: PetscFunctionBegin;
665: PetscSFCheckGraphSet(sf, 1);
666: PetscAssertPointer(isf, 2);
668: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
669: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
671: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
672: PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
673: for (i = 0; i < maxlocal; i++) {
674: leaves[i].rank = rank;
675: leaves[i].index = i;
676: }
677: for (i = 0; i < nroots; i++) {
678: roots[i].rank = -1;
679: roots[i].index = -1;
680: }
681: PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
682: PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
684: /* Check whether our leaves are sparse */
685: for (i = 0, count = 0; i < nroots; i++)
686: if (roots[i].rank >= 0) count++;
687: if (count == nroots) newilocal = NULL;
688: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
689: for (i = 0, count = 0; i < nroots; i++) {
690: if (roots[i].rank >= 0) {
691: newilocal[count] = i;
692: roots[count].rank = roots[i].rank;
693: roots[count].index = roots[i].index;
694: count++;
695: }
696: }
697: }
699: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
700: PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
701: PetscCall(PetscFree2(roots, leaves));
702: PetscFunctionReturn(PETSC_SUCCESS);
703: }
705: /*@
706: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
708: Collective
710: Input Parameters:
711: + sf - communication object to duplicate
712: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
714: Output Parameter:
715: . newsf - new communication object
717: Level: beginner
719: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
720: @*/
721: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
722: {
723: PetscSFType type;
724: MPI_Datatype dtype = MPIU_SCALAR;
726: PetscFunctionBegin;
729: PetscAssertPointer(newsf, 3);
730: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
731: PetscCall(PetscSFGetType(sf, &type));
732: if (type) PetscCall(PetscSFSetType(*newsf, type));
733: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
734: if (opt == PETSCSF_DUPLICATE_GRAPH) {
735: PetscSFCheckGraphSet(sf, 1);
736: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
737: PetscInt nroots, nleaves;
738: const PetscInt *ilocal;
739: const PetscSFNode *iremote;
740: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
741: PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
742: } else {
743: PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
744: }
745: }
746: /* Since oldtype is committed, so is newtype, according to MPI */
747: if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
748: (*newsf)->vscat.bs = sf->vscat.bs;
749: (*newsf)->vscat.unit = dtype;
750: (*newsf)->vscat.to_n = sf->vscat.to_n;
751: (*newsf)->vscat.from_n = sf->vscat.from_n;
752: /* Do not copy lsf. Build it on demand since it is rarely used */
754: #if defined(PETSC_HAVE_DEVICE)
755: (*newsf)->backend = sf->backend;
756: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
757: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
758: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
759: #endif
760: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
761: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
762: PetscFunctionReturn(PETSC_SUCCESS);
763: }
765: /*@C
766: PetscSFGetGraph - Get the graph specifying a parallel star forest
768: Not Collective
770: Input Parameter:
771: . sf - star forest
773: Output Parameters:
774: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
775: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
776: . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
777: - iremote - remote locations of root vertices for each leaf on the current process
779: Level: intermediate
781: Notes:
782: We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
784: The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
786: Fortran Notes:
787: The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
788: want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
790: To check for a `NULL` `ilocal` use
791: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
793: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
794: @*/
795: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
796: {
797: PetscFunctionBegin;
799: if (sf->ops->GetGraph) {
800: PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
801: } else {
802: if (nroots) *nroots = sf->nroots;
803: if (nleaves) *nleaves = sf->nleaves;
804: if (ilocal) *ilocal = sf->mine;
805: if (iremote) *iremote = sf->remote;
806: }
807: PetscFunctionReturn(PETSC_SUCCESS);
808: }
810: /*@
811: PetscSFGetLeafRange - Get the active leaf ranges
813: Not Collective
815: Input Parameter:
816: . sf - star forest
818: Output Parameters:
819: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
820: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
822: Level: developer
824: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
825: @*/
826: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
827: {
828: PetscFunctionBegin;
830: PetscSFCheckGraphSet(sf, 1);
831: if (minleaf) *minleaf = sf->minleaf;
832: if (maxleaf) *maxleaf = sf->maxleaf;
833: PetscFunctionReturn(PETSC_SUCCESS);
834: }
836: /*@C
837: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
839: Collective
841: Input Parameters:
842: + A - the star forest
843: . obj - Optional object that provides the prefix for the option names
844: - name - command line option
846: Level: intermediate
848: Note:
849: See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
851: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
852: @*/
853: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
854: {
855: PetscFunctionBegin;
857: PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: /*@C
862: PetscSFView - view a star forest
864: Collective
866: Input Parameters:
867: + sf - star forest
868: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
870: Level: beginner
872: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
873: @*/
874: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
875: {
876: PetscBool iascii;
877: PetscViewerFormat format;
879: PetscFunctionBegin;
881: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
883: PetscCheckSameComm(sf, 1, viewer, 2);
884: if (sf->graphset) PetscCall(PetscSFSetUp(sf));
885: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
886: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
887: PetscMPIInt rank;
888: PetscInt ii, i, j;
890: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
891: PetscCall(PetscViewerASCIIPushTab(viewer));
892: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
893: if (!sf->graphset) {
894: PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
895: PetscCall(PetscViewerASCIIPopTab(viewer));
896: PetscFunctionReturn(PETSC_SUCCESS);
897: }
898: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
899: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
900: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
901: for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
902: PetscCall(PetscViewerFlush(viewer));
903: PetscCall(PetscViewerGetFormat(viewer, &format));
904: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
905: PetscMPIInt *tmpranks, *perm;
906: PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
907: PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
908: for (i = 0; i < sf->nranks; i++) perm[i] = i;
909: PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
910: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
911: for (ii = 0; ii < sf->nranks; ii++) {
912: i = perm[ii];
913: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
914: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
915: }
916: PetscCall(PetscFree2(tmpranks, perm));
917: }
918: PetscCall(PetscViewerFlush(viewer));
919: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
920: }
921: PetscCall(PetscViewerASCIIPopTab(viewer));
922: }
923: PetscTryTypeMethod(sf, View, viewer);
924: PetscFunctionReturn(PETSC_SUCCESS);
925: }
927: /*@C
928: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
930: Not Collective
932: Input Parameter:
933: . sf - star forest
935: Output Parameters:
936: + nranks - number of ranks referenced by local part
937: . ranks - [`nranks`] array of ranks
938: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
939: . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
940: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
942: Level: developer
944: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
945: @*/
946: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
947: {
948: PetscFunctionBegin;
950: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
951: if (sf->ops->GetRootRanks) {
952: PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
953: } else {
954: /* The generic implementation */
955: if (nranks) *nranks = sf->nranks;
956: if (ranks) *ranks = sf->ranks;
957: if (roffset) *roffset = sf->roffset;
958: if (rmine) *rmine = sf->rmine;
959: if (rremote) *rremote = sf->rremote;
960: }
961: PetscFunctionReturn(PETSC_SUCCESS);
962: }
964: /*@C
965: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
967: Not Collective
969: Input Parameter:
970: . sf - star forest
972: Output Parameters:
973: + niranks - number of leaf ranks referencing roots on this process
974: . iranks - [`niranks`] array of ranks
975: . ioffset - [`niranks`+1] offset in `irootloc` for each rank
976: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
978: Level: developer
980: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
981: @*/
982: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
983: {
984: PetscFunctionBegin;
986: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
987: if (sf->ops->GetLeafRanks) {
988: PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
989: } else {
990: PetscSFType type;
991: PetscCall(PetscSFGetType(sf, &type));
992: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
993: }
994: PetscFunctionReturn(PETSC_SUCCESS);
995: }
997: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
998: {
999: PetscInt i;
1000: for (i = 0; i < n; i++) {
1001: if (needle == list[i]) return PETSC_TRUE;
1002: }
1003: return PETSC_FALSE;
1004: }
1006: /*@C
1007: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1009: Collective
1011: Input Parameters:
1012: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1013: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1015: Level: developer
1017: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1018: @*/
1019: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1020: {
1021: PetscHMapI table;
1022: PetscHashIter pos;
1023: PetscMPIInt size, groupsize, *groupranks;
1024: PetscInt *rcount, *ranks;
1025: PetscInt i, irank = -1, orank = -1;
1027: PetscFunctionBegin;
1029: PetscSFCheckGraphSet(sf, 1);
1030: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1031: PetscCall(PetscHMapICreateWithSize(10, &table));
1032: for (i = 0; i < sf->nleaves; i++) {
1033: /* Log 1-based rank */
1034: PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1035: }
1036: PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1037: PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1038: PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1039: PetscHashIterBegin(table, pos);
1040: for (i = 0; i < sf->nranks; i++) {
1041: PetscHashIterGetKey(table, pos, ranks[i]);
1042: PetscHashIterGetVal(table, pos, rcount[i]);
1043: PetscHashIterNext(table, pos);
1044: ranks[i]--; /* Convert back to 0-based */
1045: }
1046: PetscCall(PetscHMapIDestroy(&table));
1048: /* We expect that dgroup is reliably "small" while nranks could be large */
1049: {
1050: MPI_Group group = MPI_GROUP_NULL;
1051: PetscMPIInt *dgroupranks;
1052: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1053: PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1054: PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1055: PetscCall(PetscMalloc1(groupsize, &groupranks));
1056: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1057: if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1058: PetscCallMPI(MPI_Group_free(&group));
1059: PetscCall(PetscFree(dgroupranks));
1060: }
1062: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1063: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1064: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1065: if (InList(ranks[i], groupsize, groupranks)) break;
1066: }
1067: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1068: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1069: }
1070: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1071: PetscInt tmprank, tmpcount;
1073: tmprank = ranks[i];
1074: tmpcount = rcount[i];
1075: ranks[i] = ranks[sf->ndranks];
1076: rcount[i] = rcount[sf->ndranks];
1077: ranks[sf->ndranks] = tmprank;
1078: rcount[sf->ndranks] = tmpcount;
1079: sf->ndranks++;
1080: }
1081: }
1082: PetscCall(PetscFree(groupranks));
1083: PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1084: if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1085: sf->roffset[0] = 0;
1086: for (i = 0; i < sf->nranks; i++) {
1087: PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1088: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1089: rcount[i] = 0;
1090: }
1091: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1092: /* short circuit */
1093: if (orank != sf->remote[i].rank) {
1094: /* Search for index of iremote[i].rank in sf->ranks */
1095: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1096: if (irank < 0) {
1097: PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1098: if (irank >= 0) irank += sf->ndranks;
1099: }
1100: orank = sf->remote[i].rank;
1101: }
1102: PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1103: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1104: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1105: rcount[irank]++;
1106: }
1107: PetscCall(PetscFree2(rcount, ranks));
1108: PetscFunctionReturn(PETSC_SUCCESS);
1109: }
1111: /*@C
1112: PetscSFGetGroups - gets incoming and outgoing process groups
1114: Collective
1116: Input Parameter:
1117: . sf - star forest
1119: Output Parameters:
1120: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1121: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1123: Level: developer
1125: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1126: @*/
1127: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1128: {
1129: MPI_Group group = MPI_GROUP_NULL;
1131: PetscFunctionBegin;
1132: PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1133: if (sf->ingroup == MPI_GROUP_NULL) {
1134: PetscInt i;
1135: const PetscInt *indegree;
1136: PetscMPIInt rank, *outranks, *inranks;
1137: PetscSFNode *remote;
1138: PetscSF bgcount;
1140: /* Compute the number of incoming ranks */
1141: PetscCall(PetscMalloc1(sf->nranks, &remote));
1142: for (i = 0; i < sf->nranks; i++) {
1143: remote[i].rank = sf->ranks[i];
1144: remote[i].index = 0;
1145: }
1146: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1147: PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1148: PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1149: PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1150: /* Enumerate the incoming ranks */
1151: PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1152: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1153: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1154: PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1155: PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1156: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1157: PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1158: PetscCallMPI(MPI_Group_free(&group));
1159: PetscCall(PetscFree2(inranks, outranks));
1160: PetscCall(PetscSFDestroy(&bgcount));
1161: }
1162: *incoming = sf->ingroup;
1164: if (sf->outgroup == MPI_GROUP_NULL) {
1165: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1166: PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1167: PetscCallMPI(MPI_Group_free(&group));
1168: }
1169: *outgoing = sf->outgroup;
1170: PetscFunctionReturn(PETSC_SUCCESS);
1171: }
1173: /*@
1174: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1176: Collective
1178: Input Parameter:
1179: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1181: Output Parameter:
1182: . multi - star forest with split roots, such that each root has degree exactly 1
1184: Level: developer
1186: Note:
1187: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1188: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1189: edge, it is a candidate for future optimization that might involve its removal.
1191: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1192: @*/
1193: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1194: {
1195: PetscFunctionBegin;
1197: PetscAssertPointer(multi, 2);
1198: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1199: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1200: *multi = sf->multi;
1201: sf->multi->multi = sf->multi;
1202: PetscFunctionReturn(PETSC_SUCCESS);
1203: }
1204: if (!sf->multi) {
1205: const PetscInt *indegree;
1206: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1207: PetscSFNode *remote;
1208: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1209: PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1210: PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1211: PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1212: inoffset[0] = 0;
1213: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1214: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1215: PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1216: PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1217: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1218: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1219: for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1220: }
1221: PetscCall(PetscMalloc1(sf->nleaves, &remote));
1222: for (i = 0; i < sf->nleaves; i++) {
1223: remote[i].rank = sf->remote[i].rank;
1224: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1225: }
1226: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1227: sf->multi->multi = sf->multi;
1228: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1229: if (sf->rankorder) { /* Sort the ranks */
1230: PetscMPIInt rank;
1231: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1232: PetscSFNode *newremote;
1233: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1234: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1235: PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1236: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1237: PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1238: PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1239: /* Sort the incoming ranks at each vertex, build the inverse map */
1240: for (i = 0; i < sf->nroots; i++) {
1241: PetscInt j;
1242: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1243: if (inranks) PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1244: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1245: }
1246: PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1247: PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1248: PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1249: for (i = 0; i < sf->nleaves; i++) {
1250: newremote[i].rank = sf->remote[i].rank;
1251: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1252: }
1253: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1254: PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1255: }
1256: PetscCall(PetscFree3(inoffset, outones, outoffset));
1257: }
1258: *multi = sf->multi;
1259: PetscFunctionReturn(PETSC_SUCCESS);
1260: }
1262: /*@C
1263: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1265: Collective
1267: Input Parameters:
1268: + sf - original star forest
1269: . nselected - number of selected roots on this process
1270: - selected - indices of the selected roots on this process
1272: Output Parameter:
1273: . esf - new star forest
1275: Level: advanced
1277: Note:
1278: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1279: be done by calling PetscSFGetGraph().
1281: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1282: @*/
1283: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1284: {
1285: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1286: const PetscInt *ilocal;
1287: signed char *rootdata, *leafdata, *leafmem;
1288: const PetscSFNode *iremote;
1289: PetscSFNode *new_iremote;
1290: MPI_Comm comm;
1292: PetscFunctionBegin;
1294: PetscSFCheckGraphSet(sf, 1);
1295: if (nselected) PetscAssertPointer(selected, 3);
1296: PetscAssertPointer(esf, 4);
1298: PetscCall(PetscSFSetUp(sf));
1299: PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1300: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1301: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1303: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1304: PetscBool dups;
1305: PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1306: PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1307: for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1308: }
1310: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1311: else {
1312: /* A generic version of creating embedded sf */
1313: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1314: maxlocal = maxleaf - minleaf + 1;
1315: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1316: leafdata = leafmem - minleaf;
1317: /* Tag selected roots and bcast to leaves */
1318: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1319: PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1320: PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1322: /* Build esf with leaves that are still connected */
1323: esf_nleaves = 0;
1324: for (i = 0; i < nleaves; i++) {
1325: j = ilocal ? ilocal[i] : i;
1326: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1327: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1328: */
1329: esf_nleaves += (leafdata[j] ? 1 : 0);
1330: }
1331: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1332: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1333: for (i = n = 0; i < nleaves; i++) {
1334: j = ilocal ? ilocal[i] : i;
1335: if (leafdata[j]) {
1336: new_ilocal[n] = j;
1337: new_iremote[n].rank = iremote[i].rank;
1338: new_iremote[n].index = iremote[i].index;
1339: ++n;
1340: }
1341: }
1342: PetscCall(PetscSFCreate(comm, esf));
1343: PetscCall(PetscSFSetFromOptions(*esf));
1344: PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1345: PetscCall(PetscFree2(rootdata, leafmem));
1346: }
1347: PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1348: PetscFunctionReturn(PETSC_SUCCESS);
1349: }
1351: /*@C
1352: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1354: Collective
1356: Input Parameters:
1357: + sf - original star forest
1358: . nselected - number of selected leaves on this process
1359: - selected - indices of the selected leaves on this process
1361: Output Parameter:
1362: . newsf - new star forest
1364: Level: advanced
1366: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1367: @*/
1368: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1369: {
1370: const PetscSFNode *iremote;
1371: PetscSFNode *new_iremote;
1372: const PetscInt *ilocal;
1373: PetscInt i, nroots, *leaves, *new_ilocal;
1374: MPI_Comm comm;
1376: PetscFunctionBegin;
1378: PetscSFCheckGraphSet(sf, 1);
1379: if (nselected) PetscAssertPointer(selected, 3);
1380: PetscAssertPointer(newsf, 4);
1382: /* Uniq selected[] and put results in leaves[] */
1383: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1384: PetscCall(PetscMalloc1(nselected, &leaves));
1385: PetscCall(PetscArraycpy(leaves, selected, nselected));
1386: PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1387: PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1389: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1390: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1391: else {
1392: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1393: PetscCall(PetscMalloc1(nselected, &new_ilocal));
1394: PetscCall(PetscMalloc1(nselected, &new_iremote));
1395: for (i = 0; i < nselected; ++i) {
1396: const PetscInt l = leaves[i];
1397: new_ilocal[i] = ilocal ? ilocal[l] : l;
1398: new_iremote[i].rank = iremote[l].rank;
1399: new_iremote[i].index = iremote[l].index;
1400: }
1401: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1402: PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1403: }
1404: PetscCall(PetscFree(leaves));
1405: PetscFunctionReturn(PETSC_SUCCESS);
1406: }
1408: /*@C
1409: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1411: Collective
1413: Input Parameters:
1414: + sf - star forest on which to communicate
1415: . unit - data type associated with each node
1416: . rootdata - buffer to broadcast
1417: - op - operation to use for reduction
1419: Output Parameter:
1420: . leafdata - buffer to be reduced with values from each leaf's respective root
1422: Level: intermediate
1424: Note:
1425: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1426: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1427: use `PetscSFBcastWithMemTypeBegin()` instead.
1429: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1430: @*/
1431: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1432: {
1433: PetscMemType rootmtype, leafmtype;
1435: PetscFunctionBegin;
1437: PetscCall(PetscSFSetUp(sf));
1438: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1439: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1440: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1441: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1442: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1443: PetscFunctionReturn(PETSC_SUCCESS);
1444: }
1446: /*@C
1447: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1448: to `PetscSFBcastEnd()`
1450: Collective
1452: Input Parameters:
1453: + sf - star forest on which to communicate
1454: . unit - data type associated with each node
1455: . rootmtype - memory type of rootdata
1456: . rootdata - buffer to broadcast
1457: . leafmtype - memory type of leafdata
1458: - op - operation to use for reduction
1460: Output Parameter:
1461: . leafdata - buffer to be reduced with values from each leaf's respective root
1463: Level: intermediate
1465: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1466: @*/
1467: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1468: {
1469: PetscFunctionBegin;
1471: PetscCall(PetscSFSetUp(sf));
1472: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1473: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1474: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1475: PetscFunctionReturn(PETSC_SUCCESS);
1476: }
1478: /*@C
1479: PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1481: Collective
1483: Input Parameters:
1484: + sf - star forest
1485: . unit - data type
1486: . rootdata - buffer to broadcast
1487: - op - operation to use for reduction
1489: Output Parameter:
1490: . leafdata - buffer to be reduced with values from each leaf's respective root
1492: Level: intermediate
1494: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1495: @*/
1496: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1497: {
1498: PetscFunctionBegin;
1500: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1501: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1502: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1503: PetscFunctionReturn(PETSC_SUCCESS);
1504: }
1506: /*@C
1507: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1509: Collective
1511: Input Parameters:
1512: + sf - star forest
1513: . unit - data type
1514: . leafdata - values to reduce
1515: - op - reduction operation
1517: Output Parameter:
1518: . rootdata - result of reduction of values from all leaves of each root
1520: Level: intermediate
1522: Note:
1523: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1524: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1525: use `PetscSFReduceWithMemTypeBegin()` instead.
1527: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1528: @*/
1529: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1530: {
1531: PetscMemType rootmtype, leafmtype;
1533: PetscFunctionBegin;
1535: PetscCall(PetscSFSetUp(sf));
1536: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1537: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1538: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1539: PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1540: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1541: PetscFunctionReturn(PETSC_SUCCESS);
1542: }
1544: /*@C
1545: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1547: Collective
1549: Input Parameters:
1550: + sf - star forest
1551: . unit - data type
1552: . leafmtype - memory type of leafdata
1553: . leafdata - values to reduce
1554: . rootmtype - memory type of rootdata
1555: - op - reduction operation
1557: Output Parameter:
1558: . rootdata - result of reduction of values from all leaves of each root
1560: Level: intermediate
1562: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1563: @*/
1564: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1565: {
1566: PetscFunctionBegin;
1568: PetscCall(PetscSFSetUp(sf));
1569: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1570: PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1571: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1572: PetscFunctionReturn(PETSC_SUCCESS);
1573: }
1575: /*@C
1576: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1578: Collective
1580: Input Parameters:
1581: + sf - star forest
1582: . unit - data type
1583: . leafdata - values to reduce
1584: - op - reduction operation
1586: Output Parameter:
1587: . rootdata - result of reduction of values from all leaves of each root
1589: Level: intermediate
1591: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1592: @*/
1593: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1594: {
1595: PetscFunctionBegin;
1597: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1598: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1599: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1600: PetscFunctionReturn(PETSC_SUCCESS);
1601: }
1603: /*@C
1604: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1605: to be completed with `PetscSFFetchAndOpEnd()`
1607: Collective
1609: Input Parameters:
1610: + sf - star forest
1611: . unit - data type
1612: . leafdata - leaf values to use in reduction
1613: - op - operation to use for reduction
1615: Output Parameters:
1616: + rootdata - root values to be updated, input state is seen by first process to perform an update
1617: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1619: Level: advanced
1621: Note:
1622: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1623: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1624: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1625: integers.
1627: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1628: @*/
1629: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1630: {
1631: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1633: PetscFunctionBegin;
1635: PetscCall(PetscSFSetUp(sf));
1636: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1637: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1638: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1639: PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1640: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1641: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1642: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1643: PetscFunctionReturn(PETSC_SUCCESS);
1644: }
1646: /*@C
1647: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1648: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1650: Collective
1652: Input Parameters:
1653: + sf - star forest
1654: . unit - data type
1655: . rootmtype - memory type of rootdata
1656: . leafmtype - memory type of leafdata
1657: . leafdata - leaf values to use in reduction
1658: . leafupdatemtype - memory type of leafupdate
1659: - op - operation to use for reduction
1661: Output Parameters:
1662: + rootdata - root values to be updated, input state is seen by first process to perform an update
1663: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1665: Level: advanced
1667: Note:
1668: See `PetscSFFetchAndOpBegin()` for more details.
1670: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1671: @*/
1672: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1673: {
1674: PetscFunctionBegin;
1676: PetscCall(PetscSFSetUp(sf));
1677: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1678: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1679: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1680: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1681: PetscFunctionReturn(PETSC_SUCCESS);
1682: }
1684: /*@C
1685: PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1686: to fetch values from roots and update atomically by applying operation using my leaf value
1688: Collective
1690: Input Parameters:
1691: + sf - star forest
1692: . unit - data type
1693: . leafdata - leaf values to use in reduction
1694: - op - operation to use for reduction
1696: Output Parameters:
1697: + rootdata - root values to be updated, input state is seen by first process to perform an update
1698: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1700: Level: advanced
1702: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1703: @*/
1704: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1705: {
1706: PetscFunctionBegin;
1708: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1709: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1710: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1711: PetscFunctionReturn(PETSC_SUCCESS);
1712: }
1714: /*@C
1715: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1717: Collective
1719: Input Parameter:
1720: . sf - star forest
1722: Output Parameter:
1723: . degree - degree of each root vertex
1725: Level: advanced
1727: Note:
1728: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1730: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1731: @*/
1732: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1733: {
1734: PetscFunctionBegin;
1736: PetscSFCheckGraphSet(sf, 1);
1737: PetscAssertPointer(degree, 2);
1738: if (!sf->degreeknown) {
1739: PetscInt i, nroots = sf->nroots, maxlocal;
1740: PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1741: maxlocal = sf->maxleaf - sf->minleaf + 1;
1742: PetscCall(PetscMalloc1(nroots, &sf->degree));
1743: PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1744: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1745: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1746: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1747: }
1748: *degree = NULL;
1749: PetscFunctionReturn(PETSC_SUCCESS);
1750: }
1752: /*@C
1753: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1755: Collective
1757: Input Parameter:
1758: . sf - star forest
1760: Output Parameter:
1761: . degree - degree of each root vertex
1763: Level: developer
1765: Note:
1766: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1768: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1769: @*/
1770: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1771: {
1772: PetscFunctionBegin;
1774: PetscSFCheckGraphSet(sf, 1);
1775: PetscAssertPointer(degree, 2);
1776: if (!sf->degreeknown) {
1777: PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1778: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1779: PetscCall(PetscFree(sf->degreetmp));
1780: sf->degreeknown = PETSC_TRUE;
1781: }
1782: *degree = sf->degree;
1783: PetscFunctionReturn(PETSC_SUCCESS);
1784: }
1786: /*@C
1787: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1788: Each multi-root is assigned index of the corresponding original root.
1790: Collective
1792: Input Parameters:
1793: + sf - star forest
1794: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1796: Output Parameters:
1797: + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1798: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1800: Level: developer
1802: Note:
1803: The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1805: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1806: @*/
1807: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1808: {
1809: PetscSF msf;
1810: PetscInt i, j, k, nroots, nmroots;
1812: PetscFunctionBegin;
1814: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1815: if (nroots) PetscAssertPointer(degree, 2);
1816: if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1817: PetscAssertPointer(multiRootsOrigNumbering, 4);
1818: PetscCall(PetscSFGetMultiSF(sf, &msf));
1819: PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1820: PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1821: for (i = 0, j = 0, k = 0; i < nroots; i++) {
1822: if (!degree[i]) continue;
1823: for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1824: }
1825: PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1826: if (nMultiRoots) *nMultiRoots = nmroots;
1827: PetscFunctionReturn(PETSC_SUCCESS);
1828: }
1830: /*@C
1831: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1833: Collective
1835: Input Parameters:
1836: + sf - star forest
1837: . unit - data type
1838: - leafdata - leaf data to gather to roots
1840: Output Parameter:
1841: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1843: Level: intermediate
1845: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1846: @*/
1847: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1848: {
1849: PetscSF multi = NULL;
1851: PetscFunctionBegin;
1853: PetscCall(PetscSFSetUp(sf));
1854: PetscCall(PetscSFGetMultiSF(sf, &multi));
1855: PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1856: PetscFunctionReturn(PETSC_SUCCESS);
1857: }
1859: /*@C
1860: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1862: Collective
1864: Input Parameters:
1865: + sf - star forest
1866: . unit - data type
1867: - leafdata - leaf data to gather to roots
1869: Output Parameter:
1870: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1872: Level: intermediate
1874: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1875: @*/
1876: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1877: {
1878: PetscSF multi = NULL;
1880: PetscFunctionBegin;
1882: PetscCall(PetscSFGetMultiSF(sf, &multi));
1883: PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1884: PetscFunctionReturn(PETSC_SUCCESS);
1885: }
1887: /*@C
1888: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1890: Collective
1892: Input Parameters:
1893: + sf - star forest
1894: . unit - data type
1895: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1897: Output Parameter:
1898: . leafdata - leaf data to be update with personal data from each respective root
1900: Level: intermediate
1902: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1903: @*/
1904: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1905: {
1906: PetscSF multi = NULL;
1908: PetscFunctionBegin;
1910: PetscCall(PetscSFSetUp(sf));
1911: PetscCall(PetscSFGetMultiSF(sf, &multi));
1912: PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1913: PetscFunctionReturn(PETSC_SUCCESS);
1914: }
1916: /*@C
1917: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1919: Collective
1921: Input Parameters:
1922: + sf - star forest
1923: . unit - data type
1924: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1926: Output Parameter:
1927: . leafdata - leaf data to be update with personal data from each respective root
1929: Level: intermediate
1931: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1932: @*/
1933: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1934: {
1935: PetscSF multi = NULL;
1937: PetscFunctionBegin;
1939: PetscCall(PetscSFGetMultiSF(sf, &multi));
1940: PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1941: PetscFunctionReturn(PETSC_SUCCESS);
1942: }
1944: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1945: {
1946: PetscInt i, n, nleaves;
1947: const PetscInt *ilocal = NULL;
1948: PetscHSetI seen;
1950: PetscFunctionBegin;
1951: if (PetscDefined(USE_DEBUG)) {
1952: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1953: PetscCall(PetscHSetICreate(&seen));
1954: for (i = 0; i < nleaves; i++) {
1955: const PetscInt leaf = ilocal ? ilocal[i] : i;
1956: PetscCall(PetscHSetIAdd(seen, leaf));
1957: }
1958: PetscCall(PetscHSetIGetSize(seen, &n));
1959: PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1960: PetscCall(PetscHSetIDestroy(&seen));
1961: }
1962: PetscFunctionReturn(PETSC_SUCCESS);
1963: }
1965: /*@
1966: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
1968: Input Parameters:
1969: + sfA - The first `PetscSF`
1970: - sfB - The second `PetscSF`
1972: Output Parameter:
1973: . sfBA - The composite `PetscSF`
1975: Level: developer
1977: Notes:
1978: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1979: forests, i.e. the same leaf is not connected with different roots.
1981: `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
1982: a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
1983: nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
1984: `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
1986: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1987: @*/
1988: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1989: {
1990: const PetscSFNode *remotePointsA, *remotePointsB;
1991: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1992: const PetscInt *localPointsA, *localPointsB;
1993: PetscInt *localPointsBA;
1994: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1995: PetscBool denseB;
1997: PetscFunctionBegin;
1999: PetscSFCheckGraphSet(sfA, 1);
2001: PetscSFCheckGraphSet(sfB, 2);
2002: PetscCheckSameComm(sfA, 1, sfB, 2);
2003: PetscAssertPointer(sfBA, 3);
2004: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2005: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2007: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2008: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2009: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2010: numRootsB; otherwise, garbage will be broadcasted.
2011: Example (comm size = 1):
2012: sfA: 0 <- (0, 0)
2013: sfB: 100 <- (0, 0)
2014: 101 <- (0, 1)
2015: Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2016: of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2017: receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2018: remotePointsA; if not recasted, point 101 would receive a garbage value. */
2019: PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2020: for (i = 0; i < numRootsB; i++) {
2021: reorderedRemotePointsA[i].rank = -1;
2022: reorderedRemotePointsA[i].index = -1;
2023: }
2024: for (i = 0; i < numLeavesA; i++) {
2025: PetscInt localp = localPointsA ? localPointsA[i] : i;
2027: if (localp >= numRootsB) continue;
2028: reorderedRemotePointsA[localp] = remotePointsA[i];
2029: }
2030: remotePointsA = reorderedRemotePointsA;
2031: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2032: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2033: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2034: leafdataB[i].rank = -1;
2035: leafdataB[i].index = -1;
2036: }
2037: PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2038: PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2039: PetscCall(PetscFree(reorderedRemotePointsA));
2041: denseB = (PetscBool)!localPointsB;
2042: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2043: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2044: else numLeavesBA++;
2045: }
2046: if (denseB) {
2047: localPointsBA = NULL;
2048: remotePointsBA = leafdataB;
2049: } else {
2050: PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2051: PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2052: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2053: const PetscInt l = localPointsB ? localPointsB[i] : i;
2055: if (leafdataB[l - minleaf].rank == -1) continue;
2056: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2057: localPointsBA[numLeavesBA] = l;
2058: numLeavesBA++;
2059: }
2060: PetscCall(PetscFree(leafdataB));
2061: }
2062: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2063: PetscCall(PetscSFSetFromOptions(*sfBA));
2064: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2065: PetscFunctionReturn(PETSC_SUCCESS);
2066: }
2068: /*@
2069: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2071: Input Parameters:
2072: + sfA - The first `PetscSF`
2073: - sfB - The second `PetscSF`
2075: Output Parameter:
2076: . sfBA - The composite `PetscSF`.
2078: Level: developer
2080: Notes:
2081: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2082: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2083: second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2085: `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2086: a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2087: roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2088: on `sfA`, then
2089: a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2091: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2092: @*/
2093: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2094: {
2095: const PetscSFNode *remotePointsA, *remotePointsB;
2096: PetscSFNode *remotePointsBA;
2097: const PetscInt *localPointsA, *localPointsB;
2098: PetscSFNode *reorderedRemotePointsA = NULL;
2099: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2100: MPI_Op op;
2101: #if defined(PETSC_USE_64BIT_INDICES)
2102: PetscBool iswin;
2103: #endif
2105: PetscFunctionBegin;
2107: PetscSFCheckGraphSet(sfA, 1);
2109: PetscSFCheckGraphSet(sfB, 2);
2110: PetscCheckSameComm(sfA, 1, sfB, 2);
2111: PetscAssertPointer(sfBA, 3);
2112: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2113: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2115: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2116: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2118: /* TODO: Check roots of sfB have degree of 1 */
2119: /* Once we implement it, we can replace the MPI_MAXLOC
2120: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2121: We use MPI_MAXLOC only to have a deterministic output from this routine if
2122: the root condition is not meet.
2123: */
2124: op = MPI_MAXLOC;
2125: #if defined(PETSC_USE_64BIT_INDICES)
2126: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2127: PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2128: if (iswin) op = MPI_REPLACE;
2129: #endif
2131: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2132: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2133: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2134: reorderedRemotePointsA[i].rank = -1;
2135: reorderedRemotePointsA[i].index = -1;
2136: }
2137: if (localPointsA) {
2138: for (i = 0; i < numLeavesA; i++) {
2139: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2140: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2141: }
2142: } else {
2143: for (i = 0; i < numLeavesA; i++) {
2144: if (i > maxleaf || i < minleaf) continue;
2145: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2146: }
2147: }
2149: PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2150: PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2151: for (i = 0; i < numRootsB; i++) {
2152: remotePointsBA[i].rank = -1;
2153: remotePointsBA[i].index = -1;
2154: }
2156: PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2157: PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2158: PetscCall(PetscFree(reorderedRemotePointsA));
2159: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2160: if (remotePointsBA[i].rank == -1) continue;
2161: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2162: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2163: localPointsBA[numLeavesBA] = i;
2164: numLeavesBA++;
2165: }
2166: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2167: PetscCall(PetscSFSetFromOptions(*sfBA));
2168: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2169: PetscFunctionReturn(PETSC_SUCCESS);
2170: }
2172: /*
2173: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2175: Input Parameter:
2176: . sf - The global `PetscSF`
2178: Output Parameter:
2179: . out - The local `PetscSF`
2181: .seealso: `PetscSF`, `PetscSFCreate()`
2182: */
2183: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2184: {
2185: MPI_Comm comm;
2186: PetscMPIInt myrank;
2187: const PetscInt *ilocal;
2188: const PetscSFNode *iremote;
2189: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2190: PetscSFNode *liremote;
2191: PetscSF lsf;
2193: PetscFunctionBegin;
2195: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2196: else {
2197: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2198: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2199: PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2201: /* Find out local edges and build a local SF */
2202: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2203: for (i = lnleaves = 0; i < nleaves; i++) {
2204: if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2205: }
2206: PetscCall(PetscMalloc1(lnleaves, &lilocal));
2207: PetscCall(PetscMalloc1(lnleaves, &liremote));
2209: for (i = j = 0; i < nleaves; i++) {
2210: if (iremote[i].rank == (PetscInt)myrank) {
2211: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2212: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2213: liremote[j].index = iremote[i].index;
2214: j++;
2215: }
2216: }
2217: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2218: PetscCall(PetscSFSetFromOptions(lsf));
2219: PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2220: PetscCall(PetscSFSetUp(lsf));
2221: *out = lsf;
2222: }
2223: PetscFunctionReturn(PETSC_SUCCESS);
2224: }
2226: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2227: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2228: {
2229: PetscMemType rootmtype, leafmtype;
2231: PetscFunctionBegin;
2233: PetscCall(PetscSFSetUp(sf));
2234: PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2235: PetscCall(PetscGetMemType(rootdata, &rootmtype));
2236: PetscCall(PetscGetMemType(leafdata, &leafmtype));
2237: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2238: PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2239: PetscFunctionReturn(PETSC_SUCCESS);
2240: }
2242: /*@
2243: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2245: Input Parameters:
2246: + comm - the communicator
2247: . nsfs - the number of input `PetscSF`
2248: . sfs - the array of input `PetscSF`
2249: . rootMode - the root mode specifying how roots are handled
2250: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2252: Output Parameter:
2253: . newsf - The resulting `PetscSF`
2255: Level: advanced
2257: Notes:
2258: The communicator of all `PetscSF`s in `sfs` must be comm.
2260: Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2262: The offsets in `leafOffsets` are added to the original leaf indices.
2264: If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2265: In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2267: If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2268: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2270: All root modes retain the essential connectivity condition.
2271: If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2272: Parameter `rootMode` controls how the input root spaces are combined.
2273: For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2274: and is also the same in the output `PetscSF`.
2275: For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2276: `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2277: roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2278: `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2279: roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2280: the original root ranks are ignored.
2281: For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2282: the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2283: to keep the load balancing.
2284: However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2286: Example:
2287: We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2288: .vb
2289: make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2290: for m in {local,global,shared}; do
2291: mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2292: done
2293: .ve
2294: we generate two identical `PetscSF`s sf_0 and sf_1,
2295: .vb
2296: PetscSF Object: sf_0 2 MPI processes
2297: type: basic
2298: rank #leaves #roots
2299: [ 0] 4 2
2300: [ 1] 4 2
2301: leaves roots roots in global numbering
2302: ( 0, 0) <- ( 0, 0) = 0
2303: ( 0, 1) <- ( 0, 1) = 1
2304: ( 0, 2) <- ( 1, 0) = 2
2305: ( 0, 3) <- ( 1, 1) = 3
2306: ( 1, 0) <- ( 0, 0) = 0
2307: ( 1, 1) <- ( 0, 1) = 1
2308: ( 1, 2) <- ( 1, 0) = 2
2309: ( 1, 3) <- ( 1, 1) = 3
2310: .ve
2311: and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2312: .vb
2313: rootMode = local:
2314: PetscSF Object: result_sf 2 MPI processes
2315: type: basic
2316: rank #leaves #roots
2317: [ 0] 8 4
2318: [ 1] 8 4
2319: leaves roots roots in global numbering
2320: ( 0, 0) <- ( 0, 0) = 0
2321: ( 0, 1) <- ( 0, 1) = 1
2322: ( 0, 2) <- ( 1, 0) = 4
2323: ( 0, 3) <- ( 1, 1) = 5
2324: ( 0, 4) <- ( 0, 2) = 2
2325: ( 0, 5) <- ( 0, 3) = 3
2326: ( 0, 6) <- ( 1, 2) = 6
2327: ( 0, 7) <- ( 1, 3) = 7
2328: ( 1, 0) <- ( 0, 0) = 0
2329: ( 1, 1) <- ( 0, 1) = 1
2330: ( 1, 2) <- ( 1, 0) = 4
2331: ( 1, 3) <- ( 1, 1) = 5
2332: ( 1, 4) <- ( 0, 2) = 2
2333: ( 1, 5) <- ( 0, 3) = 3
2334: ( 1, 6) <- ( 1, 2) = 6
2335: ( 1, 7) <- ( 1, 3) = 7
2337: rootMode = global:
2338: PetscSF Object: result_sf 2 MPI processes
2339: type: basic
2340: rank #leaves #roots
2341: [ 0] 8 4
2342: [ 1] 8 4
2343: leaves roots roots in global numbering
2344: ( 0, 0) <- ( 0, 0) = 0
2345: ( 0, 1) <- ( 0, 1) = 1
2346: ( 0, 2) <- ( 0, 2) = 2
2347: ( 0, 3) <- ( 0, 3) = 3
2348: ( 0, 4) <- ( 1, 0) = 4
2349: ( 0, 5) <- ( 1, 1) = 5
2350: ( 0, 6) <- ( 1, 2) = 6
2351: ( 0, 7) <- ( 1, 3) = 7
2352: ( 1, 0) <- ( 0, 0) = 0
2353: ( 1, 1) <- ( 0, 1) = 1
2354: ( 1, 2) <- ( 0, 2) = 2
2355: ( 1, 3) <- ( 0, 3) = 3
2356: ( 1, 4) <- ( 1, 0) = 4
2357: ( 1, 5) <- ( 1, 1) = 5
2358: ( 1, 6) <- ( 1, 2) = 6
2359: ( 1, 7) <- ( 1, 3) = 7
2361: rootMode = shared:
2362: PetscSF Object: result_sf 2 MPI processes
2363: type: basic
2364: rank #leaves #roots
2365: [ 0] 8 2
2366: [ 1] 8 2
2367: leaves roots roots in global numbering
2368: ( 0, 0) <- ( 0, 0) = 0
2369: ( 0, 1) <- ( 0, 1) = 1
2370: ( 0, 2) <- ( 1, 0) = 2
2371: ( 0, 3) <- ( 1, 1) = 3
2372: ( 0, 4) <- ( 0, 0) = 0
2373: ( 0, 5) <- ( 0, 1) = 1
2374: ( 0, 6) <- ( 1, 0) = 2
2375: ( 0, 7) <- ( 1, 1) = 3
2376: ( 1, 0) <- ( 0, 0) = 0
2377: ( 1, 1) <- ( 0, 1) = 1
2378: ( 1, 2) <- ( 1, 0) = 2
2379: ( 1, 3) <- ( 1, 1) = 3
2380: ( 1, 4) <- ( 0, 0) = 0
2381: ( 1, 5) <- ( 0, 1) = 1
2382: ( 1, 6) <- ( 1, 0) = 2
2383: ( 1, 7) <- ( 1, 1) = 3
2384: .ve
2386: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2387: @*/
2388: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2389: {
2390: PetscInt i, s, nLeaves, nRoots;
2391: PetscInt *leafArrayOffsets;
2392: PetscInt *ilocal_new;
2393: PetscSFNode *iremote_new;
2394: PetscBool all_ilocal_null = PETSC_FALSE;
2395: PetscLayout glayout = NULL;
2396: PetscInt *gremote = NULL;
2397: PetscMPIInt rank, size;
2399: PetscFunctionBegin;
2400: if (PetscDefined(USE_DEBUG)) {
2401: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2403: PetscCall(PetscSFCreate(comm, &dummy));
2405: PetscAssertPointer(sfs, 3);
2406: for (i = 0; i < nsfs; i++) {
2408: PetscCheckSameComm(dummy, 1, sfs[i], 3);
2409: }
2411: if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2412: PetscAssertPointer(newsf, 6);
2413: PetscCall(PetscSFDestroy(&dummy));
2414: }
2415: if (!nsfs) {
2416: PetscCall(PetscSFCreate(comm, newsf));
2417: PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2418: PetscFunctionReturn(PETSC_SUCCESS);
2419: }
2420: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2421: PetscCallMPI(MPI_Comm_size(comm, &size));
2423: /* Calculate leaf array offsets */
2424: PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2425: leafArrayOffsets[0] = 0;
2426: for (s = 0; s < nsfs; s++) {
2427: PetscInt nl;
2429: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2430: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2431: }
2432: nLeaves = leafArrayOffsets[nsfs];
2434: /* Calculate number of roots */
2435: switch (rootMode) {
2436: case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2437: PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2438: if (PetscDefined(USE_DEBUG)) {
2439: for (s = 1; s < nsfs; s++) {
2440: PetscInt nr;
2442: PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2443: PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2444: }
2445: }
2446: } break;
2447: case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2448: /* Calculate also global layout in this case */
2449: PetscInt *nls;
2450: PetscLayout *lts;
2451: PetscInt **inds;
2452: PetscInt j;
2453: PetscInt rootOffset = 0;
2455: PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
2456: PetscCall(PetscLayoutCreate(comm, &glayout));
2457: glayout->bs = 1;
2458: glayout->n = 0;
2459: glayout->N = 0;
2460: for (s = 0; s < nsfs; s++) {
2461: PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
2462: glayout->n += lts[s]->n;
2463: glayout->N += lts[s]->N;
2464: }
2465: PetscCall(PetscLayoutSetUp(glayout));
2466: PetscCall(PetscMalloc1(nLeaves, &gremote));
2467: for (s = 0, j = 0; s < nsfs; s++) {
2468: for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2469: rootOffset += lts[s]->N;
2470: PetscCall(PetscLayoutDestroy(<s[s]));
2471: PetscCall(PetscFree(inds[s]));
2472: }
2473: PetscCall(PetscFree3(lts, nls, inds));
2474: nRoots = glayout->N;
2475: } break;
2476: case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2477: /* nRoots calculated later in this case */
2478: break;
2479: default:
2480: SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2481: }
2483: if (!leafOffsets) {
2484: all_ilocal_null = PETSC_TRUE;
2485: for (s = 0; s < nsfs; s++) {
2486: const PetscInt *ilocal;
2488: PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2489: if (ilocal) {
2490: all_ilocal_null = PETSC_FALSE;
2491: break;
2492: }
2493: }
2494: PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2495: }
2497: /* Renumber and concatenate local leaves */
2498: ilocal_new = NULL;
2499: if (!all_ilocal_null) {
2500: PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2501: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2502: for (s = 0; s < nsfs; s++) {
2503: const PetscInt *ilocal;
2504: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2505: PetscInt i, nleaves_l;
2507: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2508: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2509: }
2510: }
2512: /* Renumber and concatenate remote roots */
2513: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2514: PetscInt rootOffset = 0;
2516: PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2517: for (i = 0; i < nLeaves; i++) {
2518: iremote_new[i].rank = -1;
2519: iremote_new[i].index = -1;
2520: }
2521: for (s = 0; s < nsfs; s++) {
2522: PetscInt i, nl, nr;
2523: PetscSF tmp_sf;
2524: const PetscSFNode *iremote;
2525: PetscSFNode *tmp_rootdata;
2526: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2528: PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2529: PetscCall(PetscSFCreate(comm, &tmp_sf));
2530: /* create helper SF with contiguous leaves */
2531: PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2532: PetscCall(PetscSFSetUp(tmp_sf));
2533: PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2534: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2535: for (i = 0; i < nr; i++) {
2536: tmp_rootdata[i].index = i + rootOffset;
2537: tmp_rootdata[i].rank = (PetscInt)rank;
2538: }
2539: rootOffset += nr;
2540: } else {
2541: for (i = 0; i < nr; i++) {
2542: tmp_rootdata[i].index = i;
2543: tmp_rootdata[i].rank = (PetscInt)rank;
2544: }
2545: }
2546: PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2547: PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2548: PetscCall(PetscSFDestroy(&tmp_sf));
2549: PetscCall(PetscFree(tmp_rootdata));
2550: }
2551: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2553: /* Build the new SF */
2554: PetscCall(PetscSFCreate(comm, newsf));
2555: PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2556: } else {
2557: /* Build the new SF */
2558: PetscCall(PetscSFCreate(comm, newsf));
2559: PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2560: }
2561: PetscCall(PetscSFSetUp(*newsf));
2562: PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2563: PetscCall(PetscLayoutDestroy(&glayout));
2564: PetscCall(PetscFree(gremote));
2565: PetscCall(PetscFree(leafArrayOffsets));
2566: PetscFunctionReturn(PETSC_SUCCESS);
2567: }