Actual source code: mspart.c
1: #include <petscsf.h>
2: #include <petscdmplex.h>
3: #include <petsc/private/dmimpl.h>
4: #include <petsc/private/dmpleximpl.h>
5: #include <petsc/private/partitionerimpl.h>
7: PetscBool MSPartitionerCite = PETSC_FALSE;
8: const char MSPartitionerCitation[] = "@article{PETScMSPart2021,\n"
9: " author = {Parsani, Matteo and Boukharfane, Radouan and Nolasco, Irving Reyna and Fern{\'a}ndez, David C Del Rey and Zampini, Stefano and Hadri, Bilel and Dalcin, Lisandro},\n"
10: " title = {High-order accurate entropy-stable discontinuous collocated Galerkin methods with the summation-by-parts property for compressible {CFD} frameworks: Scalable {SSDC} algorithms and flow solver},\n"
11: " journal = {Journal of Computational Physics},\n"
12: " volume = {424},\n"
13: " pages = {109844},\n"
14: " year = {2021}\n"
15: "}\n";
17: PetscLogEvent PetscPartitioner_MS_SetUp;
18: PetscLogEvent PetscPartitioner_MS_Stage[PETSCPARTITIONER_MS_NUMSTAGE];
20: typedef struct {
21: PetscInt levels;
22: MPI_Group *lgroup;
23: /* Need access to the DM in inner stages */
24: PetscInt stage;
25: DM stagedm;
26: /* Stage partitioners */
27: PetscPartitioner *ppart;
28: /* Diagnostic */
29: PetscInt *edgeCut;
30: /* Target partition weights */
31: PetscBool view_tpwgs;
32: PetscSection *tpwgs;
33: PetscBool compute_tpwgs;
34: } PetscPartitioner_MS;
36: const char *const PetscPartitionerMultistageStrategyList[] = {"NODE", "MSECTION", "PetscPartitionerMultistageStrategy", "PETSCPARTITIONER_MS_", NULL};
38: static void PetscLCM_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
39: {
40: PetscInt count = *cnt;
41: PetscInt *xin = (PetscInt *)in, *xout = (PetscInt *)out;
43: PetscFunctionBegin;
44: if (*datatype != MPIU_INT) {
45: (void)(*PetscErrorPrintf)("Can only handle MPIU_INT");
46: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
47: }
48: for (PetscInt i = 0; i < count; i++) xout[i] = PetscLCM(xin[i], xout[i]);
49: PetscFunctionReturnVoid();
50: }
52: static PetscErrorCode PetscPartitionerView_Multistage(PetscPartitioner part, PetscViewer viewer)
53: {
54: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
55: PetscViewerFormat format;
56: PetscBool iascii;
58: PetscFunctionBegin;
59: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
60: PetscCall(PetscViewerGetFormat(viewer, &format));
61: if (iascii) {
62: MPI_Comm comm;
63: MPI_Group group;
65: PetscCall(PetscViewerASCIIPushTab(viewer));
66: if (!p->stagedm || p->stage == 0) PetscCall(PetscViewerASCIIPrintf(viewer, "Multistage graph partitioner: total stages %" PetscInt_FMT "\n", p->levels));
67: comm = PetscObjectComm((PetscObject)part);
68: PetscCallMPI(MPI_Comm_group(comm, &group));
69: for (PetscInt l = 0; l < p->levels; l++) {
70: PetscPartitioner ppart = p->ppart[l];
71: MPI_Comm pcomm = PetscObjectComm((PetscObject)ppart);
72: MPI_Group pgroup = MPI_GROUP_EMPTY;
74: PetscCall(PetscViewerASCIIPushTab(viewer));
75: if (l) {
76: if (pcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
77: } else {
78: pgroup = p->lgroup[0];
79: }
81: if (l) {
82: IS is;
83: PetscMPIInt gr, lem, gem = 0;
84: PetscInt uniq;
86: lem = 1;
87: PetscCallMPI(MPI_Group_size(group, &gr));
88: if (pgroup != MPI_GROUP_EMPTY) {
89: lem = 0;
90: PetscCallMPI(MPI_Group_translate_ranks(pgroup, 1, &lem, group, &gr));
91: }
92: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)part), 1, gr, 1, &is));
93: PetscCall(ISRenumber(is, NULL, &uniq, NULL));
94: PetscCall(ISDestroy(&is));
95: PetscCallMPI(MPIU_Allreduce(&lem, &gem, 1, MPI_INT, MPI_SUM, PetscObjectComm((PetscObject)part)));
96: if (gem) uniq--;
97: if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioners (%" PetscInt_FMT " unique groups, %d empty processes)\n", l, uniq, gem));
98: } else {
99: PetscMPIInt psize;
100: PetscCallMPI(MPI_Group_size(pgroup, &psize));
101: if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioner to %d processes\n", l, psize));
102: }
103: PetscCall(PetscViewerFlush(viewer));
104: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL && (!p->stagedm || l == p->stage)) {
105: PetscViewer pviewer;
107: if (pcomm == MPI_COMM_NULL) pcomm = PETSC_COMM_SELF;
108: PetscCall(PetscViewerGetSubViewer(viewer, pcomm, &pviewer));
109: if (ppart) {
110: PetscMPIInt size, *ranks, *granks;
111: char tstr[16], strranks[3072]; /* I'm lazy: max 12 chars (including spaces) per rank -> 256 ranks max*/
113: PetscCallMPI(MPI_Group_size(pgroup, &size));
114: PetscCall(PetscMalloc2(size, &ranks, size, &granks));
115: for (PetscInt i = 0; i < size; i++) ranks[i] = i;
116: PetscCallMPI(MPI_Group_translate_ranks(pgroup, size, ranks, group, granks));
117: if (size <= 256) {
118: PetscCall(PetscStrncpy(strranks, "", sizeof(strranks)));
119: for (PetscInt i = 0; i < size; i++) {
120: PetscCall(PetscSNPrintf(tstr, sizeof(tstr), " %d", granks[i]));
121: PetscCall(PetscStrlcat(strranks, tstr, sizeof(strranks)));
122: }
123: } else {
124: PetscCall(PetscStrncpy(strranks, " not showing > 256", sizeof(strranks))); /* LCOV_EXCL_LINE */
125: }
126: PetscCall(PetscFree2(ranks, granks));
127: if (!l) {
128: PetscCall(PetscViewerASCIIPrintf(pviewer, "Destination ranks:%s\n", strranks));
129: PetscCall(PetscPartitionerView(ppart, pviewer));
130: } else {
131: PetscCall(PetscPartitionerView(ppart, pviewer));
132: PetscCall(PetscViewerASCIIPrintf(pviewer, " Participating ranks:%s\n", strranks));
133: }
134: if (p->view_tpwgs) PetscCall(PetscSectionView(p->tpwgs[l], pviewer));
135: }
136: PetscCall(PetscViewerRestoreSubViewer(viewer, pcomm, &pviewer));
137: }
138: PetscCall(PetscViewerFlush(viewer));
140: if (l && pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
141: }
142: for (PetscInt l = 0; l < p->levels; l++) PetscCall(PetscViewerASCIIPopTab(viewer));
143: PetscCall(PetscViewerASCIIPopTab(viewer));
144: PetscCallMPI(MPI_Group_free(&group));
145: }
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: static PetscErrorCode PetscPartitionerMultistage_CreateStages(MPI_Comm comm, const PetscInt *options, PetscInt *nlevels, MPI_Group *levels[])
150: {
151: MPI_Comm ncomm;
152: MPI_Group *lgroup;
153: MPI_Group ggroup, ngroup;
154: PetscMPIInt *ranks, *granks;
155: PetscMPIInt size, nsize, isize, rank, nrank, i, l, n, m;
156: PetscInt strategy = options ? options[0] : (PetscInt)PETSCPARTITIONER_MS_STRATEGY_NODE;
158: PetscFunctionBegin;
159: PetscCallMPI(MPI_Comm_size(comm, &size));
160: PetscCallMPI(MPI_Comm_rank(comm, &rank));
162: if (size == 1) {
163: PetscCall(PetscMalloc1(1, &lgroup));
164: PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
165: *nlevels = 1;
166: *levels = lgroup;
167: PetscFunctionReturn(PETSC_SUCCESS);
168: }
170: switch (strategy) {
171: case PETSCPARTITIONER_MS_STRATEGY_NODE:
172: /* create groups (in global rank ordering of comm) for the 2-level partitioner */
173: if (options && options[1] > 0) {
174: PetscMPIInt node_size = (PetscMPIInt)options[1];
175: if (node_size > 1) {
176: PetscMPIInt color;
177: if (options[2]) { /* interleaved */
178: PetscMPIInt ngroups = size / node_size;
180: if (size % node_size) ngroups += 1;
181: color = rank % ngroups;
182: } else {
183: color = rank / node_size;
184: }
185: PetscCallMPI(MPI_Comm_split(comm, color, rank, &ncomm));
186: } else {
187: PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
188: }
189: } else {
190: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
191: PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &ncomm));
192: #else
193: /* if users do not specify the node size and MPI_Comm_split_type is not available, defaults to same comm */
194: PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
195: #endif
196: }
198: PetscCallMPI(MPI_Comm_size(ncomm, &nsize));
199: if (nsize == size) { /* one node */
200: PetscCall(PetscMalloc1(1, &lgroup));
201: PetscCallMPI(MPI_Comm_group(ncomm, &lgroup[0]));
202: PetscCallMPI(MPI_Comm_free(&ncomm));
204: *nlevels = 1;
205: *levels = lgroup;
206: break;
207: }
209: PetscCall(PetscMalloc1(2, &lgroup));
211: /* intranode group (in terms of global rank) */
212: PetscCall(PetscMalloc2(size, &ranks, nsize, &granks));
213: for (i = 0; i < nsize; i++) ranks[i] = i;
214: PetscCallMPI(MPI_Comm_group(comm, &ggroup));
215: PetscCallMPI(MPI_Comm_group(ncomm, &ngroup));
216: PetscCallMPI(MPI_Group_translate_ranks(ngroup, nsize, ranks, ggroup, granks));
217: PetscCallMPI(MPI_Group_incl(ggroup, nsize, granks, &lgroup[1]));
219: /* internode group (master processes on the nodes only)
220: this group must be specified by all processes in the comm
221: we need to gather those master ranks */
222: PetscCallMPI(MPI_Group_rank(ngroup, &nrank));
223: granks[0] = !nrank ? rank : -1;
224: PetscCallMPI(MPI_Allgather(granks, 1, MPI_INT, ranks, 1, MPI_INT, comm));
225: for (i = 0, isize = 0; i < size; i++)
226: if (ranks[i] >= 0) ranks[isize++] = ranks[i];
227: PetscCallMPI(MPI_Group_incl(ggroup, isize, ranks, &lgroup[0]));
229: PetscCall(PetscFree2(ranks, granks));
230: PetscCallMPI(MPI_Group_free(&ggroup));
231: PetscCallMPI(MPI_Group_free(&ngroup));
232: PetscCallMPI(MPI_Comm_free(&ncomm));
234: *nlevels = 2;
235: *levels = lgroup;
236: break;
238: case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
239: /* recursive m-section (m=2 bisection) */
240: m = options ? (PetscMPIInt)options[1] : 2;
241: PetscCheck(m >= 2, comm, PETSC_ERR_SUP, "Invalid split %d, must be greater than one", m);
242: l = 0;
243: n = 1;
244: while (n <= size) {
245: l++;
246: n *= m;
247: }
248: l -= 1;
249: n /= m;
251: PetscCheck(l != 0, comm, PETSC_ERR_SUP, "Invalid split %d with communicator size %d", m, size);
252: PetscCall(PetscMalloc1(l, &lgroup));
253: for (i = 1; i < l; i++) lgroup[i] = MPI_GROUP_EMPTY;
255: if (l > 1) {
256: PetscMPIInt rem = size - n;
257: PetscCall(PetscMalloc1((m + rem), &ranks));
258: for (i = 0; i < m; i++) ranks[i] = i * (n / m);
259: for (i = 0; i < rem; i++) ranks[i + m] = n + i;
260: PetscCallMPI(MPI_Comm_group(comm, &ggroup));
261: PetscCallMPI(MPI_Group_incl(ggroup, m + rem, ranks, &lgroup[0]));
262: if (rank < n) {
263: for (i = 1; i < l; i++) {
264: PetscMPIInt inc = (PetscMPIInt)PetscPowInt(m, l - i - 1);
265: if (rank % inc == 0) {
266: PetscMPIInt anch = (rank - rank % (inc * m)), r;
267: for (r = 0; r < m; r++) ranks[r] = anch + r * inc;
268: PetscCallMPI(MPI_Group_incl(ggroup, m, ranks, &lgroup[i]));
269: }
270: }
271: }
272: PetscCall(PetscFree(ranks));
273: PetscCallMPI(MPI_Group_free(&ggroup));
274: } else {
275: PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
276: }
278: *nlevels = l;
279: *levels = lgroup;
280: break;
282: default:
283: SETERRQ(comm, PETSC_ERR_SUP, "Not implemented"); /* LCOV_EXCL_LINE */
284: }
285: PetscFunctionReturn(PETSC_SUCCESS);
286: }
288: static PetscErrorCode PetscPartitionerMultistage_DestroyStages(PetscInt nstages, MPI_Group *groups[])
289: {
290: PetscFunctionBegin;
291: for (PetscInt l = 0; l < nstages; l++) {
292: MPI_Group group = (*groups)[l];
293: if (group != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&group));
294: }
295: PetscCall(PetscFree(*groups));
296: PetscFunctionReturn(PETSC_SUCCESS);
297: }
299: static PetscErrorCode PetscPartitionerReset_Multistage(PetscPartitioner part)
300: {
301: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
302: PetscInt nstages = p->levels;
304: PetscFunctionBegin;
305: p->levels = 0;
306: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &p->lgroup));
307: for (PetscInt l = 0; l < nstages; l++) {
308: PetscCall(PetscPartitionerDestroy(&p->ppart[l]));
309: PetscCall(PetscSectionDestroy(&p->tpwgs[l]));
310: }
311: PetscCall(PetscFree(p->ppart));
312: PetscCall(PetscFree(p->tpwgs));
313: PetscCall(PetscFree(p->edgeCut));
314: PetscFunctionReturn(PETSC_SUCCESS);
315: }
317: static PetscErrorCode PetscPartitionerDestroy_Multistage(PetscPartitioner part)
318: {
319: PetscFunctionBegin;
320: PetscCall(PetscPartitionerReset_Multistage(part));
321: PetscCall(PetscFree(part->data));
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: /*@
326: PetscPartitionerMultistageSetStages - Sets stages for the partitioning
328: Collective
330: Input Parameters:
331: + part - the `PetscPartitioner` object.
332: . levels - the number of stages.
333: - lgroup - array of `MPI_Group`s for each stage.
335: Level: advanced
337: Notes:
338: `MPI_Comm_create(comm, lgroup[l], &lcomm)` is used to compute the communicator for the stage partitioner at level `l`.
339: The groups must be specified in the process numbering of the partitioner communicator.
340: `lgroup[0]` must be collectively specified and it must represent a proper subset of the communicator associated with the original partitioner.
341: For each level, ranks can be listed in one group only (but they can be listed on different levels)
343: .seealso: `PetscPartitionerSetType()`, `PetscPartitionerDestroy()`, `PETSCPARTITIONERMULTISTAGE`
344: @*/
345: PetscErrorCode PetscPartitionerMultistageSetStages(PetscPartitioner part, PetscInt levels, MPI_Group lgroup[])
346: {
347: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
348: MPI_Comm comm;
349: MPI_Group wgroup;
350: PetscMPIInt **lparts = NULL;
351: PetscMPIInt rank, size;
352: PetscBool touched, isms;
354: PetscFunctionBegin;
357: PetscCheck(levels >= 0, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Number of levels must be non-negative");
358: if (levels) PetscAssertPointer(lgroup, 3);
359: PetscCall(PetscObjectTypeCompare((PetscObject)part, PETSCPARTITIONERMULTISTAGE, &isms));
360: if (!isms) PetscFunctionReturn(PETSC_SUCCESS);
361: PetscCall(PetscLogEventBegin(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
363: PetscCall(PetscPartitionerReset_Multistage(part));
365: PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
366: PetscCallMPI(MPI_Comm_group(comm, &wgroup));
367: PetscCallMPI(MPI_Comm_rank(comm, &rank));
368: PetscCallMPI(MPI_Comm_size(comm, &size));
370: p->levels = levels;
371: PetscCall(PetscCalloc1(p->levels, &p->ppart));
372: PetscCall(PetscCalloc1(p->levels, &p->lgroup));
373: PetscCall(PetscCalloc1(p->levels, &p->tpwgs));
374: PetscCall(PetscCalloc1(p->levels, &p->edgeCut));
376: /* support for target partition weights */
377: touched = PETSC_FALSE;
378: if (p->compute_tpwgs) PetscCall(PetscMalloc1(p->levels, &lparts));
380: for (PetscInt l = 0; l < p->levels; l++) {
381: const char *prefix;
382: char aprefix[256];
383: MPI_Comm lcomm;
385: if (l) { /* let MPI complain/hang if the user did not specify the groups properly */
386: PetscCallMPI(MPI_Comm_create(comm, lgroup[l], &lcomm));
387: } else { /* in debug mode, we check that the initial group must be consistently (collectively) specified on comm */
388: #if defined(PETSC_USE_DEBUG)
389: MPI_Group group, igroup = lgroup[0];
390: PetscMPIInt *ranks, *granks;
391: PetscMPIInt b[2], b2[2], csize, gsize;
393: PetscCallMPI(MPI_Group_size(igroup, &gsize));
394: b[0] = -gsize;
395: b[1] = +gsize;
396: PetscCallMPI(MPIU_Allreduce(b, b2, 2, MPI_INT, MPI_MAX, comm));
397: PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
398: PetscCallMPI(MPI_Comm_group(comm, &group));
399: PetscCallMPI(MPI_Group_size(group, &csize));
400: PetscCall(PetscMalloc2(gsize, &ranks, (csize * gsize), &granks));
401: for (PetscMPIInt i = 0; i < gsize; i++) granks[i] = i;
402: PetscCallMPI(MPI_Group_translate_ranks(igroup, gsize, granks, group, ranks));
403: PetscCallMPI(MPI_Group_free(&group));
404: PetscCallMPI(MPI_Allgather(ranks, gsize, MPI_INT, granks, gsize, MPI_INT, comm));
405: for (PetscMPIInt i = 0; i < gsize; i++) {
406: PetscMPIInt chkr = granks[i];
407: for (PetscMPIInt j = 0; j < csize; j++) {
408: PetscMPIInt shft = j * gsize + i;
409: PetscCheck(chkr == granks[shft], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
410: }
411: }
412: PetscCall(PetscFree2(ranks, granks));
413: #endif
414: lcomm = comm;
415: }
416: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)part, &prefix));
417: if (lcomm != MPI_COMM_NULL) {
418: /* MPI_Group_dup */
419: PetscCallMPI(MPI_Group_union(lgroup[l], MPI_GROUP_EMPTY, &p->lgroup[l]));
420: PetscCall(PetscPartitionerCreate(lcomm, &p->ppart[l]));
421: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)p->ppart[l], prefix));
422: PetscCall(PetscSNPrintf(aprefix, sizeof(aprefix), "petscpartitioner_multistage_levels_%" PetscInt_FMT "_", l));
423: PetscCall(PetscObjectAppendOptionsPrefix((PetscObject)p->ppart[l], aprefix));
424: } else {
425: PetscCheck(l != 0, PetscObjectComm((PetscObject)part), PETSC_ERR_USER, "Invalid first group");
426: p->lgroup[l] = MPI_GROUP_EMPTY;
427: }
428: if (lcomm != comm && lcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&lcomm));
430: /* compute number of partitions per level and detect if a process is part of the process (at any level) */
431: if (p->compute_tpwgs) {
432: PetscMPIInt gsize;
433: PetscCall(PetscMalloc1(size, &lparts[l]));
434: PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
435: if (!l) {
436: PetscMPIInt tr;
437: PetscCallMPI(MPI_Group_translate_ranks(wgroup, 1, &rank, p->lgroup[0], &tr));
438: if (tr == MPI_UNDEFINED) gsize = 0;
439: }
440: if (touched && !gsize) gsize = 1;
441: PetscCallMPI(MPI_Allgather(&gsize, 1, MPI_INT, lparts[l], 1, MPI_INT, comm));
442: if (lparts[l][rank]) touched = PETSC_TRUE;
443: }
444: }
446: /* determine weights (bottom-up) */
447: if (p->compute_tpwgs) {
448: PetscMPIInt *tranks;
449: PetscInt *lwgts, wgt;
450: MPI_Op MPIU_LCM; /* XXX this is actually recreated at every setup */
452: PetscCall(PetscMalloc1((2 * size), &tranks));
454: /* we need to compute the least common multiple across processes */
455: PetscCallMPI(MPI_Op_create(PetscLCM_Local, 1, &MPIU_LCM));
457: /* final target has to have all ones as weights (if the process gets touched) */
458: wgt = touched ? 1 : 0;
459: PetscCall(PetscMalloc1(size, &lwgts));
460: PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
462: /* now go up the hierarchy and populate the PetscSection describing the partition weights */
463: for (PetscInt l = p->levels - 1; l >= 0; l--) {
464: MPI_Comm pcomm;
465: MPI_Group igroup;
466: PetscMPIInt isize, isizer = 0;
467: PetscInt a, b, wgtr;
468: PetscMPIInt gsize;
469: PetscBool usep = PETSC_FALSE;
471: if (p->ppart[l]) {
472: PetscCall(PetscObjectGetComm((PetscObject)p->ppart[l], &pcomm));
473: } else {
474: pcomm = PETSC_COMM_SELF;
475: }
477: PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
478: if (gsize) {
479: usep = PETSC_TRUE;
480: for (PetscMPIInt i = 0; i < gsize; i++) tranks[i] = i;
481: PetscCallMPI(MPI_Group_translate_ranks(p->lgroup[l], gsize, tranks, wgroup, tranks + size));
482: } else gsize = lparts[l][rank];
483: PetscCall(PetscFree(lparts[l]));
484: PetscCall(PetscSectionCreate(pcomm, &p->tpwgs[l]));
485: PetscCall(PetscObjectSetName((PetscObject)p->tpwgs[l], "Target partition weights"));
486: PetscCall(PetscSectionSetChart(p->tpwgs[l], 0, gsize));
488: if (usep) {
489: PetscMPIInt *tr = tranks + size;
490: for (PetscMPIInt i = 0; i < gsize; i++) PetscCall(PetscSectionSetDof(p->tpwgs[l], i, lwgts[tr[i]]));
491: } else if (gsize) {
492: PetscCall(PetscSectionSetDof(p->tpwgs[l], 0, 1));
493: }
494: PetscCall(PetscSectionSetUp(p->tpwgs[l]));
495: if (!l) break;
497: /* determine number of processes shared by two consecutive levels */
498: PetscCallMPI(MPI_Group_intersection(p->lgroup[l], p->lgroup[l - 1], &igroup));
499: PetscCallMPI(MPI_Group_size(igroup, &isize));
500: PetscCallMPI(MPI_Group_free(&igroup));
501: if (!p->ppart[l] && touched) isize = 1; /* if this process gets touched, needs to propagate its data from one level to the other */
503: /* reduce on level partitioner comm the size of the max size of the igroup */
504: PetscCallMPI(MPIU_Allreduce(&isize, &isizer, 1, MPI_INT, MPI_MAX, pcomm));
506: /* sum previously computed partition weights on the level comm */
507: wgt = lwgts[rank];
508: wgtr = wgt;
509: PetscCallMPI(MPIU_Allreduce(&wgt, &wgtr, 1, MPIU_INT, MPI_SUM, pcomm));
511: /* partition weights are given with integers; to properly compute these and be able to propagate them to the next level,
512: we need to compute the least common multiple of isizer across the global comm */
513: a = isizer ? isizer : 1;
514: b = a;
515: PetscCallMPI(MPIU_Allreduce(&a, &b, 1, MPIU_INT, MPIU_LCM, comm));
517: /* finally share this process weight with all the other processes */
518: wgt = isizer ? (b * wgtr) / isizer : 0;
519: PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
520: }
521: PetscCall(PetscFree(lwgts));
522: PetscCall(PetscFree(tranks));
523: PetscCall(PetscFree(lparts));
524: PetscCallMPI(MPI_Op_free(&MPIU_LCM));
525: }
526: PetscCallMPI(MPI_Group_free(&wgroup));
527: PetscCall(PetscLogEventEnd(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
528: PetscFunctionReturn(PETSC_SUCCESS);
529: }
531: PetscErrorCode PetscPartitionerMultistageGetStages_Multistage(PetscPartitioner part, PetscInt *levels, MPI_Group *lgroup[])
532: {
533: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
535: PetscFunctionBegin;
536: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
538: if (levels) *levels = p->levels;
539: if (lgroup) *lgroup = p->lgroup;
540: PetscFunctionReturn(PETSC_SUCCESS);
541: }
543: PetscErrorCode PetscPartitionerMultistageSetStage_Multistage(PetscPartitioner part, PetscInt stage, PetscObject odm)
544: {
545: DM dm = (DM)odm;
546: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
548: PetscFunctionBegin;
549: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
550: PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
551: PetscCheck(stage >= 0 && stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", stage, p->levels);
553: p->stage = stage;
554: PetscCall(PetscObjectReference((PetscObject)dm));
555: PetscCall(DMDestroy(&p->stagedm));
556: p->stagedm = dm;
557: PetscFunctionReturn(PETSC_SUCCESS);
558: }
560: PetscErrorCode PetscPartitionerMultistageGetStage_Multistage(PetscPartitioner part, PetscInt *stage, PetscObject *odm)
561: {
562: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
564: PetscFunctionBegin;
565: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
566: if (stage) *stage = p->stage;
567: if (odm) *odm = (PetscObject)p->stagedm;
568: PetscFunctionReturn(PETSC_SUCCESS);
569: }
571: static PetscErrorCode PetscPartitionerSetUp_Multistage(PetscPartitioner part)
572: {
573: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
574: MPI_Comm comm = PetscObjectComm((PetscObject)part);
575: PetscInt nstages;
576: MPI_Group *groups;
578: PetscFunctionBegin;
579: if (p->levels) PetscFunctionReturn(PETSC_SUCCESS);
580: PetscCall(PetscPartitionerMultistage_CreateStages(comm, NULL, &nstages, &groups));
581: PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
582: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
583: PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: /* targetSection argument unused, target partition weights are computed internally */
587: static PetscErrorCode PetscPartitionerPartition_Multistage(PetscPartitioner part, PetscInt nparts, PetscInt numVertices, PetscInt start[], PetscInt adjacency[], PetscSection vertSection, PetscSection edgeSection, PETSC_UNUSED PetscSection targetSection, PetscSection partSection, IS *partition)
588: {
589: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
590: PetscPartitioner ppart;
591: PetscSection ppartSection = NULL;
592: IS lpartition;
593: const PetscInt *idxs;
594: MPI_Comm comm, pcomm;
595: MPI_Group group, lgroup, pgroup;
596: PetscInt *pstart, *padjacency;
597: PetscInt nid, i, pedgeCut;
598: PetscMPIInt sameComm, pparts;
599: PetscBool freeadj = PETSC_FALSE;
601: PetscFunctionBegin;
602: PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
603: PetscCheck(p->stage >= 0 && p->stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", p->stage, p->levels);
604: PetscCall(PetscLogEventBegin(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
606: /* Group for current stage (size of the group defines number of "local" partitions) */
607: lgroup = p->lgroup[p->stage];
608: PetscCallMPI(MPI_Group_size(lgroup, &pparts));
610: /* Current stage partitioner */
611: ppart = p->ppart[p->stage];
612: if (ppart) {
613: PetscCall(PetscObjectGetComm((PetscObject)ppart, &pcomm));
614: PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
615: } else {
616: pcomm = PETSC_COMM_SELF;
617: pgroup = MPI_GROUP_EMPTY;
618: pparts = -1;
619: }
621: /* Global comm of partitioner */
622: PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
623: PetscCallMPI(MPI_Comm_group(comm, &group));
625: /* Get adjacency */
626: PetscCallMPI(MPI_Group_compare(group, pgroup, &sameComm));
627: if (sameComm != MPI_UNEQUAL) {
628: pstart = start;
629: padjacency = adjacency;
630: } else { /* restrict to partitioner comm */
631: ISLocalToGlobalMapping l2g, g2l;
632: IS gid, rid;
633: const PetscInt *idxs1;
634: PetscInt *idxs2;
635: PetscInt cStart, cEnd, cum;
636: DM dm = p->stagedm;
637: PetscSF sf;
639: PetscCheck(dm, PetscObjectComm((PetscObject)part), PETSC_ERR_PLIB, "Missing stage DM");
640: PetscCall(DMGetPointSF(dm, &sf));
641: PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
642: PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, part->height, NULL, sf, &gid));
643: /* filter overlapped local cells (if any) */
644: PetscCall(ISGetIndices(gid, &idxs1));
645: PetscCall(ISGetLocalSize(gid, &cum));
646: PetscCall(PetscMalloc1(cum, &idxs2));
647: for (i = cStart, cum = 0; i < cEnd; i++) {
648: if (idxs1[i - cStart] < 0) continue;
649: idxs2[cum++] = idxs1[i - cStart];
650: }
651: PetscCall(ISRestoreIndices(gid, &idxs1));
653: PetscCheck(numVertices == cum, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, numVertices, cum);
654: PetscCall(ISDestroy(&gid));
656: /*
657: g2l from full numbering to local numbering
658: l2g from local numbering to restricted numbering
659: */
660: PetscCall(ISCreateGeneral(pcomm, numVertices, idxs2, PETSC_OWN_POINTER, &gid));
661: PetscCall(ISRenumber(gid, NULL, NULL, &rid));
662: PetscCall(ISLocalToGlobalMappingCreateIS(gid, &g2l));
663: PetscCall(ISLocalToGlobalMappingSetType(g2l, ISLOCALTOGLOBALMAPPINGHASH));
664: PetscCall(ISLocalToGlobalMappingCreateIS(rid, &l2g));
665: PetscCall(ISDestroy(&gid));
666: PetscCall(ISDestroy(&rid));
668: PetscCall(PetscMalloc2(numVertices + 1, &pstart, start[numVertices], &padjacency));
669: pstart[0] = 0;
670: for (i = 0; i < numVertices; i++) {
671: PetscCall(ISGlobalToLocalMappingApply(g2l, IS_GTOLM_DROP, start[i + 1] - start[i], adjacency + start[i], &pstart[i + 1], padjacency + pstart[i]));
672: PetscCall(ISLocalToGlobalMappingApply(l2g, pstart[i + 1], padjacency + pstart[i], padjacency + pstart[i]));
673: pstart[i + 1] += pstart[i];
674: }
675: PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
676: PetscCall(ISLocalToGlobalMappingDestroy(&g2l));
678: freeadj = PETSC_TRUE;
679: }
681: /* Compute partitioning */
682: pedgeCut = 0;
683: PetscCall(PetscSectionCreate(pcomm, &ppartSection));
684: if (ppart) {
685: PetscMPIInt prank;
687: PetscCheck(ppart->ops->partition, PetscObjectComm((PetscObject)ppart), PETSC_ERR_ARG_WRONGSTATE, "PetscPartitioner has no partitioning method on stage %" PetscInt_FMT, p->stage);
688: PetscCall(PetscPartitionerPartition(ppart, pparts, numVertices, pstart, padjacency, vertSection, edgeSection, p->tpwgs[p->stage], ppartSection, &lpartition));
689: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ppart), &prank));
690: if (!prank) pedgeCut = ppart->edgeCut; /* only the master rank will reduce */
691: } else { /* not participating */
692: PetscCall(ISCreateStride(PETSC_COMM_SELF, numVertices, 0, 1, &lpartition));
693: pparts = numVertices > 0 ? 1 : 0;
694: }
695: if (freeadj) PetscCall(PetscFree2(pstart, padjacency));
697: /* Init final partition (output) */
698: PetscCall(PetscSectionReset(partSection));
699: PetscCall(PetscSectionSetChart(partSection, 0, nparts));
701: /* We need to map the section back to the global comm numbering */
702: for (i = 0; i < pparts; i++) {
703: PetscInt dof;
704: PetscMPIInt mp, mpt;
706: if (lgroup != MPI_GROUP_EMPTY) {
707: PetscCall(PetscMPIIntCast(i, &mp));
708: PetscCall(PetscSectionGetDof(ppartSection, i, &dof));
709: PetscCallMPI(MPI_Group_translate_ranks(lgroup, 1, &mp, group, &mpt));
710: } else {
711: PetscCheck(pparts == 1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unexpected pparts %d", pparts);
712: PetscCallMPI(MPI_Comm_rank(comm, &mpt));
713: PetscCall(ISGetLocalSize(lpartition, &dof));
714: }
715: PetscCall(PetscSectionSetDof(partSection, mpt, dof));
716: }
717: PetscCall(PetscSectionSetUp(partSection));
718: PetscCall(PetscSectionDestroy(&ppartSection));
720: /* No need to translate the "partition" output, as it is in local cell numbering
721: we only change the comm of the index set */
722: PetscCall(ISGetIndices(lpartition, &idxs));
723: PetscCall(ISGetLocalSize(lpartition, &nid));
724: PetscCall(ISCreateGeneral(comm, nid, idxs, PETSC_COPY_VALUES, partition));
725: PetscCall(ISRestoreIndices(lpartition, &idxs));
726: PetscCall(ISDestroy(&lpartition));
728: PetscCall(PetscSectionViewFromOptions(partSection, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
729: PetscCall(ISViewFromOptions(*partition, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
731: /* Update edge-cut */
732: p->edgeCut[p->stage] = pedgeCut;
733: for (i = p->stage + 1; i < p->levels; i++) p->edgeCut[i] = 0;
734: for (i = 0; i < p->stage; i++) pedgeCut += p->edgeCut[i];
735: part->edgeCut = -1;
736: PetscCallMPI(MPI_Reduce(&pedgeCut, &part->edgeCut, 1, MPIU_INT, MPI_SUM, 0, comm));
738: PetscCallMPI(MPI_Group_free(&group));
739: if (pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
740: PetscCall(PetscLogEventEnd(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
741: PetscFunctionReturn(PETSC_SUCCESS);
742: }
744: static PetscErrorCode PetscPartitionerSetFromOptions_Multistage(PetscPartitioner part, PetscOptionItems PetscOptionsObject)
745: {
746: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
747: PetscEnum strategy = (PetscEnum)PETSCPARTITIONER_MS_STRATEGY_NODE;
748: PetscBool set, roundrobin;
749: PetscInt options[3] = {PETSC_DETERMINE, PETSC_DETERMINE, PETSC_DETERMINE};
751: PetscFunctionBegin;
752: PetscOptionsHeadBegin(PetscOptionsObject, "PetscPartitioner Multistate Options");
753: PetscCall(PetscOptionsEnum("-petscpartitioner_multistage_strategy", "Default partitioning strategy", "", PetscPartitionerMultistageStrategyList, strategy, &strategy, &set));
754: if (set || !p->levels) {
755: options[0] = (PetscInt)strategy;
756: switch (options[0]) {
757: case PETSCPARTITIONER_MS_STRATEGY_NODE:
758: options[1] = PETSC_DETERMINE;
759: roundrobin = PETSC_FALSE;
760: PetscCall(PetscOptionsInt("-petscpartitioner_multistage_node_size", "Number of processes per node", "", options[1], &options[1], NULL));
761: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_node_interleaved", "Use round robin rank assignments", "", roundrobin, &roundrobin, NULL));
762: options[2] = (PetscInt)roundrobin;
763: break;
764: case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
765: options[1] = 2;
766: PetscCall(PetscOptionsInt("-petscpartitioner_multistage_msection", "Number of splits per level", "", options[1], &options[1], NULL));
767: break;
768: default:
769: break; /* LCOV_EXCL_LINE */
770: }
771: }
772: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_tpwgts", "Use target partition weights in stage partitioners", "", p->compute_tpwgs, &p->compute_tpwgs, NULL));
773: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_viewtpwgts", "View target partition weights", "", p->view_tpwgs, &p->view_tpwgs, NULL));
774: PetscOptionsHeadEnd();
776: if (options[0] != PETSC_DETERMINE) {
777: MPI_Comm comm = PetscObjectComm((PetscObject)part);
778: PetscInt nstages;
779: MPI_Group *groups;
781: PetscCall(PetscPartitionerMultistage_CreateStages(comm, options, &nstages, &groups));
782: PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
783: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
784: }
786: {
787: PetscInt nstages = p->levels, l;
788: for (l = 0; l < nstages; l++) {
789: if (p->ppart[l]) PetscCall(PetscPartitionerSetFromOptions(p->ppart[l]));
790: }
791: }
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: /*MC
796: PETSCPARTITIONERMULTISTAGE = "multistage" - A PetscPartitioner object using a multistage distribution strategy
798: Level: intermediate
800: Options Database Keys:
801: + -petscpartitioner_multistage_strategy <strategy> - Either `PETSCPARTITIONER_MS_STRATEGY_NODE`, or `PETSCPARTITIONER_MS_STRATEGY_MSECTION`
802: . -petscpartitioner_multistage_node_size <int> - Number of processes per computing node (or `PETSC_DECIDE`)
803: . -petscpartitioner_multistage_node_interleaved <bool> - Assign ranks round-robin.
804: . -petscpartitioner_multistage_msection <int> - Number of splits per level in recursive m-section splits (use `2` for bisection)
805: - -petscpartitioner_multistage_tpwgts <bool> - Use target partition weights in stage partitioner
807: Notes:
808: The default multistage strategy use `PETSCPARTITIONER_MS_STRATEGY_NODE` and automatically discovers node information using `MPI_Comm_split_type`.
809: `PETSCPARTITIONER_MS_STRATEGY_MSECTION` is more for testing purposes.
810: Options for single stage partitioners are prefixed by `-petscpartitioner_multistage_levels_`.
811: For example, to use parmetis in all stages, `-petscpartitioner_multistage_levels_petscpartitioner_type parmetis`
812: Finer grained control is also possible: `-petscpartitioner_multistage_levels_0_petscpartitioner_type parmetis`, `-petscpartitioner_multistage_levels_1_petscpartitioner_type simple`
814: .seealso: `PetscPartitionerType`, `PetscPartitionerCreate()`, `PetscPartitionerSetType()`
815: M*/
817: PETSC_EXTERN PetscErrorCode PetscPartitionerCreate_Multistage(PetscPartitioner part)
818: {
819: PetscPartitioner_MS *p;
821: PetscFunctionBegin;
823: PetscCall(PetscNew(&p));
824: p->compute_tpwgs = PETSC_TRUE;
826: part->data = p;
827: part->ops->view = PetscPartitionerView_Multistage;
828: part->ops->destroy = PetscPartitionerDestroy_Multistage;
829: part->ops->partition = PetscPartitionerPartition_Multistage;
830: part->ops->setfromoptions = PetscPartitionerSetFromOptions_Multistage;
831: part->ops->setup = PetscPartitionerSetUp_Multistage;
833: PetscCall(PetscCitationsRegister(MSPartitionerCitation, &MSPartitionerCite));
834: PetscFunctionReturn(PETSC_SUCCESS);
835: }