Actual source code: mspart.c

  1: #include <petscsf.h>
  2: #include <petscdmplex.h>
  3: #include <petsc/private/dmimpl.h>
  4: #include <petsc/private/dmpleximpl.h>
  5: #include <petsc/private/partitionerimpl.h>

  7: PetscBool  MSPartitionerCite       = PETSC_FALSE;
  8: const char MSPartitionerCitation[] = "@article{PETScMSPart2021,\n"
  9:                                      "  author  = {Parsani, Matteo and Boukharfane, Radouan and Nolasco, Irving Reyna and Fern{\'a}ndez, David C Del Rey and Zampini, Stefano and Hadri, Bilel and Dalcin, Lisandro},\n"
 10:                                      "  title   = {High-order accurate entropy-stable discontinuous collocated Galerkin methods with the summation-by-parts property for compressible {CFD} frameworks: Scalable {SSDC} algorithms and flow solver},\n"
 11:                                      "  journal = {Journal of Computational Physics},\n"
 12:                                      "  volume  = {424},\n"
 13:                                      "  pages   = {109844},\n"
 14:                                      "  year    = {2021}\n"
 15:                                      "}\n";

 17: PetscLogEvent PetscPartitioner_MS_SetUp;
 18: PetscLogEvent PetscPartitioner_MS_Stage[PETSCPARTITIONER_MS_NUMSTAGE];

 20: typedef struct {
 21:   PetscInt   levels;
 22:   MPI_Group *lgroup;
 23:   /* Need access to the DM in inner stages */
 24:   PetscInt stage;
 25:   DM       stagedm;
 26:   /* Stage partitioners */
 27:   PetscPartitioner *ppart;
 28:   /* Diagnostic */
 29:   PetscInt *edgeCut;
 30:   /* Target partition weights */
 31:   PetscBool     view_tpwgs;
 32:   PetscSection *tpwgs;
 33:   PetscBool     compute_tpwgs;
 34: } PetscPartitioner_MS;

 36: const char *const PetscPartitionerMultistageStrategyList[] = {"NODE", "MSECTION", "PetscPartitionerMultistageStrategy", "PETSCPARTITIONER_MS_", NULL};

 38: static void PetscLCM_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 39: {
 40:   PetscInt  count = *cnt;
 41:   PetscInt *xin = (PetscInt *)in, *xout = (PetscInt *)out;

 43:   PetscFunctionBegin;
 44:   if (*datatype != MPIU_INT) {
 45:     (void)(*PetscErrorPrintf)("Can only handle MPIU_INT");
 46:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 47:   }
 48:   for (PetscInt i = 0; i < count; i++) xout[i] = PetscLCM(xin[i], xout[i]);
 49:   PetscFunctionReturnVoid();
 50: }

 52: static PetscErrorCode PetscPartitionerView_Multistage(PetscPartitioner part, PetscViewer viewer)
 53: {
 54:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
 55:   PetscViewerFormat    format;
 56:   PetscBool            iascii;

 58:   PetscFunctionBegin;
 59:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
 60:   PetscCall(PetscViewerGetFormat(viewer, &format));
 61:   if (iascii) {
 62:     MPI_Comm  comm;
 63:     MPI_Group group;

 65:     PetscCall(PetscViewerASCIIPushTab(viewer));
 66:     if (!p->stagedm || p->stage == 0) PetscCall(PetscViewerASCIIPrintf(viewer, "Multistage graph partitioner: total stages %" PetscInt_FMT "\n", p->levels));
 67:     comm = PetscObjectComm((PetscObject)part);
 68:     PetscCallMPI(MPI_Comm_group(comm, &group));
 69:     for (PetscInt l = 0; l < p->levels; l++) {
 70:       PetscPartitioner ppart  = p->ppart[l];
 71:       MPI_Comm         pcomm  = PetscObjectComm((PetscObject)ppart);
 72:       MPI_Group        pgroup = MPI_GROUP_EMPTY;

 74:       PetscCall(PetscViewerASCIIPushTab(viewer));
 75:       if (l) {
 76:         if (pcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
 77:       } else {
 78:         pgroup = p->lgroup[0];
 79:       }

 81:       if (l) {
 82:         IS          is;
 83:         PetscMPIInt gr, lem, gem = 0;
 84:         PetscInt    uniq;

 86:         lem = 1;
 87:         PetscCallMPI(MPI_Group_size(group, &gr));
 88:         if (pgroup != MPI_GROUP_EMPTY) {
 89:           lem = 0;
 90:           PetscCallMPI(MPI_Group_translate_ranks(pgroup, 1, &lem, group, &gr));
 91:         }
 92:         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)part), 1, gr, 1, &is));
 93:         PetscCall(ISRenumber(is, NULL, &uniq, NULL));
 94:         PetscCall(ISDestroy(&is));
 95:         PetscCallMPI(MPIU_Allreduce(&lem, &gem, 1, MPI_INT, MPI_SUM, PetscObjectComm((PetscObject)part)));
 96:         if (gem) uniq--;
 97:         if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioners (%" PetscInt_FMT " unique groups, %d empty processes)\n", l, uniq, gem));
 98:       } else {
 99:         PetscMPIInt psize;
100:         PetscCallMPI(MPI_Group_size(pgroup, &psize));
101:         if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioner to %d processes\n", l, psize));
102:       }
103:       PetscCall(PetscViewerFlush(viewer));
104:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL && (!p->stagedm || l == p->stage)) {
105:         PetscViewer pviewer;

107:         if (pcomm == MPI_COMM_NULL) pcomm = PETSC_COMM_SELF;
108:         PetscCall(PetscViewerGetSubViewer(viewer, pcomm, &pviewer));
109:         if (ppart) {
110:           PetscMPIInt size, *ranks, *granks;
111:           char        tstr[16], strranks[3072]; /* I'm lazy: max 12 chars (including spaces) per rank -> 256 ranks max*/

113:           PetscCallMPI(MPI_Group_size(pgroup, &size));
114:           PetscCall(PetscMalloc2(size, &ranks, size, &granks));
115:           for (PetscInt i = 0; i < size; i++) ranks[i] = i;
116:           PetscCallMPI(MPI_Group_translate_ranks(pgroup, size, ranks, group, granks));
117:           if (size <= 256) {
118:             PetscCall(PetscStrncpy(strranks, "", sizeof(strranks)));
119:             for (PetscInt i = 0; i < size; i++) {
120:               PetscCall(PetscSNPrintf(tstr, sizeof(tstr), " %d", granks[i]));
121:               PetscCall(PetscStrlcat(strranks, tstr, sizeof(strranks)));
122:             }
123:           } else {
124:             PetscCall(PetscStrncpy(strranks, " not showing > 256", sizeof(strranks))); /* LCOV_EXCL_LINE */
125:           }
126:           PetscCall(PetscFree2(ranks, granks));
127:           if (!l) {
128:             PetscCall(PetscViewerASCIIPrintf(pviewer, "Destination ranks:%s\n", strranks));
129:             PetscCall(PetscPartitionerView(ppart, pviewer));
130:           } else {
131:             PetscCall(PetscPartitionerView(ppart, pviewer));
132:             PetscCall(PetscViewerASCIIPrintf(pviewer, "  Participating ranks:%s\n", strranks));
133:           }
134:           if (p->view_tpwgs) PetscCall(PetscSectionView(p->tpwgs[l], pviewer));
135:         }
136:         PetscCall(PetscViewerRestoreSubViewer(viewer, pcomm, &pviewer));
137:       }
138:       PetscCall(PetscViewerFlush(viewer));

140:       if (l && pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
141:     }
142:     for (PetscInt l = 0; l < p->levels; l++) PetscCall(PetscViewerASCIIPopTab(viewer));
143:     PetscCall(PetscViewerASCIIPopTab(viewer));
144:     PetscCallMPI(MPI_Group_free(&group));
145:   }
146:   PetscFunctionReturn(PETSC_SUCCESS);
147: }

149: static PetscErrorCode PetscPartitionerMultistage_CreateStages(MPI_Comm comm, const PetscInt *options, PetscInt *nlevels, MPI_Group *levels[])
150: {
151:   MPI_Comm     ncomm;
152:   MPI_Group   *lgroup;
153:   MPI_Group    ggroup, ngroup;
154:   PetscMPIInt *ranks, *granks;
155:   PetscMPIInt  size, nsize, isize, rank, nrank, i, l, n, m;
156:   PetscInt     strategy = options ? options[0] : (PetscInt)PETSCPARTITIONER_MS_STRATEGY_NODE;

158:   PetscFunctionBegin;
159:   PetscCallMPI(MPI_Comm_size(comm, &size));
160:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

162:   if (size == 1) {
163:     PetscCall(PetscMalloc1(1, &lgroup));
164:     PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
165:     *nlevels = 1;
166:     *levels  = lgroup;
167:     PetscFunctionReturn(PETSC_SUCCESS);
168:   }

170:   switch (strategy) {
171:   case PETSCPARTITIONER_MS_STRATEGY_NODE:
172:     /* create groups (in global rank ordering of comm) for the 2-level partitioner */
173:     if (options && options[1] > 0) {
174:       PetscMPIInt node_size = (PetscMPIInt)options[1];
175:       if (node_size > 1) {
176:         PetscMPIInt color;
177:         if (options[2]) { /* interleaved */
178:           PetscMPIInt ngroups = size / node_size;

180:           if (size % node_size) ngroups += 1;
181:           color = rank % ngroups;
182:         } else {
183:           color = rank / node_size;
184:         }
185:         PetscCallMPI(MPI_Comm_split(comm, color, rank, &ncomm));
186:       } else {
187:         PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
188:       }
189:     } else {
190: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
191:       PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &ncomm));
192: #else
193:       /* if users do not specify the node size and MPI_Comm_split_type is not available, defaults to same comm */
194:       PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
195: #endif
196:     }

198:     PetscCallMPI(MPI_Comm_size(ncomm, &nsize));
199:     if (nsize == size) { /* one node */
200:       PetscCall(PetscMalloc1(1, &lgroup));
201:       PetscCallMPI(MPI_Comm_group(ncomm, &lgroup[0]));
202:       PetscCallMPI(MPI_Comm_free(&ncomm));

204:       *nlevels = 1;
205:       *levels  = lgroup;
206:       break;
207:     }

209:     PetscCall(PetscMalloc1(2, &lgroup));

211:     /* intranode group (in terms of global rank) */
212:     PetscCall(PetscMalloc2(size, &ranks, nsize, &granks));
213:     for (i = 0; i < nsize; i++) ranks[i] = i;
214:     PetscCallMPI(MPI_Comm_group(comm, &ggroup));
215:     PetscCallMPI(MPI_Comm_group(ncomm, &ngroup));
216:     PetscCallMPI(MPI_Group_translate_ranks(ngroup, nsize, ranks, ggroup, granks));
217:     PetscCallMPI(MPI_Group_incl(ggroup, nsize, granks, &lgroup[1]));

219:     /* internode group (master processes on the nodes only)
220:        this group must be specified by all processes in the comm
221:        we need to gather those master ranks */
222:     PetscCallMPI(MPI_Group_rank(ngroup, &nrank));
223:     granks[0] = !nrank ? rank : -1;
224:     PetscCallMPI(MPI_Allgather(granks, 1, MPI_INT, ranks, 1, MPI_INT, comm));
225:     for (i = 0, isize = 0; i < size; i++)
226:       if (ranks[i] >= 0) ranks[isize++] = ranks[i];
227:     PetscCallMPI(MPI_Group_incl(ggroup, isize, ranks, &lgroup[0]));

229:     PetscCall(PetscFree2(ranks, granks));
230:     PetscCallMPI(MPI_Group_free(&ggroup));
231:     PetscCallMPI(MPI_Group_free(&ngroup));
232:     PetscCallMPI(MPI_Comm_free(&ncomm));

234:     *nlevels = 2;
235:     *levels  = lgroup;
236:     break;

238:   case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
239:     /* recursive m-section (m=2 bisection) */
240:     m = options ? (PetscMPIInt)options[1] : 2;
241:     PetscCheck(m >= 2, comm, PETSC_ERR_SUP, "Invalid split %d, must be greater than one", m);
242:     l = 0;
243:     n = 1;
244:     while (n <= size) {
245:       l++;
246:       n *= m;
247:     }
248:     l -= 1;
249:     n /= m;

251:     PetscCheck(l != 0, comm, PETSC_ERR_SUP, "Invalid split %d with communicator size %d", m, size);
252:     PetscCall(PetscMalloc1(l, &lgroup));
253:     for (i = 1; i < l; i++) lgroup[i] = MPI_GROUP_EMPTY;

255:     if (l > 1) {
256:       PetscMPIInt rem = size - n;
257:       PetscCall(PetscMalloc1((m + rem), &ranks));
258:       for (i = 0; i < m; i++) ranks[i] = i * (n / m);
259:       for (i = 0; i < rem; i++) ranks[i + m] = n + i;
260:       PetscCallMPI(MPI_Comm_group(comm, &ggroup));
261:       PetscCallMPI(MPI_Group_incl(ggroup, m + rem, ranks, &lgroup[0]));
262:       if (rank < n) {
263:         for (i = 1; i < l; i++) {
264:           PetscMPIInt inc = (PetscMPIInt)PetscPowInt(m, l - i - 1);
265:           if (rank % inc == 0) {
266:             PetscMPIInt anch = (rank - rank % (inc * m)), r;
267:             for (r = 0; r < m; r++) ranks[r] = anch + r * inc;
268:             PetscCallMPI(MPI_Group_incl(ggroup, m, ranks, &lgroup[i]));
269:           }
270:         }
271:       }
272:       PetscCall(PetscFree(ranks));
273:       PetscCallMPI(MPI_Group_free(&ggroup));
274:     } else {
275:       PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
276:     }

278:     *nlevels = l;
279:     *levels  = lgroup;
280:     break;

282:   default:
283:     SETERRQ(comm, PETSC_ERR_SUP, "Not implemented"); /* LCOV_EXCL_LINE */
284:   }
285:   PetscFunctionReturn(PETSC_SUCCESS);
286: }

288: static PetscErrorCode PetscPartitionerMultistage_DestroyStages(PetscInt nstages, MPI_Group *groups[])
289: {
290:   PetscFunctionBegin;
291:   for (PetscInt l = 0; l < nstages; l++) {
292:     MPI_Group group = (*groups)[l];
293:     if (group != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&group));
294:   }
295:   PetscCall(PetscFree(*groups));
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }

299: static PetscErrorCode PetscPartitionerReset_Multistage(PetscPartitioner part)
300: {
301:   PetscPartitioner_MS *p       = (PetscPartitioner_MS *)part->data;
302:   PetscInt             nstages = p->levels;

304:   PetscFunctionBegin;
305:   p->levels = 0;
306:   PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &p->lgroup));
307:   for (PetscInt l = 0; l < nstages; l++) {
308:     PetscCall(PetscPartitionerDestroy(&p->ppart[l]));
309:     PetscCall(PetscSectionDestroy(&p->tpwgs[l]));
310:   }
311:   PetscCall(PetscFree(p->ppart));
312:   PetscCall(PetscFree(p->tpwgs));
313:   PetscCall(PetscFree(p->edgeCut));
314:   PetscFunctionReturn(PETSC_SUCCESS);
315: }

317: static PetscErrorCode PetscPartitionerDestroy_Multistage(PetscPartitioner part)
318: {
319:   PetscFunctionBegin;
320:   PetscCall(PetscPartitionerReset_Multistage(part));
321:   PetscCall(PetscFree(part->data));
322:   PetscFunctionReturn(PETSC_SUCCESS);
323: }

325: /*@
326:   PetscPartitionerMultistageSetStages - Sets stages for the partitioning

328:   Collective

330:   Input Parameters:
331: + part   - the `PetscPartitioner` object.
332: . levels - the number of stages.
333: - lgroup - array of `MPI_Group`s for each stage.

335:   Level: advanced

337:   Notes:
338:   `MPI_Comm_create(comm, lgroup[l], &lcomm)` is used to compute the communicator for the stage partitioner at level `l`.
339:   The groups must be specified in the process numbering of the partitioner communicator.
340:   `lgroup[0]` must be collectively specified and it must represent a proper subset of the communicator associated with the original partitioner.
341:   For each level, ranks can be listed in one group only (but they can be listed on different levels)

343: .seealso: `PetscPartitionerSetType()`, `PetscPartitionerDestroy()`, `PETSCPARTITIONERMULTISTAGE`
344: @*/
345: PetscErrorCode PetscPartitionerMultistageSetStages(PetscPartitioner part, PetscInt levels, MPI_Group lgroup[])
346: {
347:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
348:   MPI_Comm             comm;
349:   MPI_Group            wgroup;
350:   PetscMPIInt        **lparts = NULL;
351:   PetscMPIInt          rank, size;
352:   PetscBool            touched, isms;

354:   PetscFunctionBegin;
357:   PetscCheck(levels >= 0, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Number of levels must be non-negative");
358:   if (levels) PetscAssertPointer(lgroup, 3);
359:   PetscCall(PetscObjectTypeCompare((PetscObject)part, PETSCPARTITIONERMULTISTAGE, &isms));
360:   if (!isms) PetscFunctionReturn(PETSC_SUCCESS);
361:   PetscCall(PetscLogEventBegin(PetscPartitioner_MS_SetUp, part, 0, 0, 0));

363:   PetscCall(PetscPartitionerReset_Multistage(part));

365:   PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
366:   PetscCallMPI(MPI_Comm_group(comm, &wgroup));
367:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
368:   PetscCallMPI(MPI_Comm_size(comm, &size));

370:   p->levels = levels;
371:   PetscCall(PetscCalloc1(p->levels, &p->ppart));
372:   PetscCall(PetscCalloc1(p->levels, &p->lgroup));
373:   PetscCall(PetscCalloc1(p->levels, &p->tpwgs));
374:   PetscCall(PetscCalloc1(p->levels, &p->edgeCut));

376:   /* support for target partition weights */
377:   touched = PETSC_FALSE;
378:   if (p->compute_tpwgs) PetscCall(PetscMalloc1(p->levels, &lparts));

380:   for (PetscInt l = 0; l < p->levels; l++) {
381:     const char *prefix;
382:     char        aprefix[256];
383:     MPI_Comm    lcomm;

385:     if (l) { /* let MPI complain/hang if the user did not specify the groups properly */
386:       PetscCallMPI(MPI_Comm_create(comm, lgroup[l], &lcomm));
387:     } else { /* in debug mode, we check that the initial group must be consistently (collectively) specified on comm */
388: #if defined(PETSC_USE_DEBUG)
389:       MPI_Group    group, igroup = lgroup[0];
390:       PetscMPIInt *ranks, *granks;
391:       PetscMPIInt  b[2], b2[2], csize, gsize;

393:       PetscCallMPI(MPI_Group_size(igroup, &gsize));
394:       b[0] = -gsize;
395:       b[1] = +gsize;
396:       PetscCallMPI(MPIU_Allreduce(b, b2, 2, MPI_INT, MPI_MAX, comm));
397:       PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
398:       PetscCallMPI(MPI_Comm_group(comm, &group));
399:       PetscCallMPI(MPI_Group_size(group, &csize));
400:       PetscCall(PetscMalloc2(gsize, &ranks, (csize * gsize), &granks));
401:       for (PetscMPIInt i = 0; i < gsize; i++) granks[i] = i;
402:       PetscCallMPI(MPI_Group_translate_ranks(igroup, gsize, granks, group, ranks));
403:       PetscCallMPI(MPI_Group_free(&group));
404:       PetscCallMPI(MPI_Allgather(ranks, gsize, MPI_INT, granks, gsize, MPI_INT, comm));
405:       for (PetscMPIInt i = 0; i < gsize; i++) {
406:         PetscMPIInt chkr = granks[i];
407:         for (PetscMPIInt j = 0; j < csize; j++) {
408:           PetscMPIInt shft = j * gsize + i;
409:           PetscCheck(chkr == granks[shft], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
410:         }
411:       }
412:       PetscCall(PetscFree2(ranks, granks));
413: #endif
414:       lcomm = comm;
415:     }
416:     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)part, &prefix));
417:     if (lcomm != MPI_COMM_NULL) {
418:       /* MPI_Group_dup */
419:       PetscCallMPI(MPI_Group_union(lgroup[l], MPI_GROUP_EMPTY, &p->lgroup[l]));
420:       PetscCall(PetscPartitionerCreate(lcomm, &p->ppart[l]));
421:       PetscCall(PetscObjectSetOptionsPrefix((PetscObject)p->ppart[l], prefix));
422:       PetscCall(PetscSNPrintf(aprefix, sizeof(aprefix), "petscpartitioner_multistage_levels_%" PetscInt_FMT "_", l));
423:       PetscCall(PetscObjectAppendOptionsPrefix((PetscObject)p->ppart[l], aprefix));
424:     } else {
425:       PetscCheck(l != 0, PetscObjectComm((PetscObject)part), PETSC_ERR_USER, "Invalid first group");
426:       p->lgroup[l] = MPI_GROUP_EMPTY;
427:     }
428:     if (lcomm != comm && lcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&lcomm));

430:     /* compute number of partitions per level and detect if a process is part of the process (at any level) */
431:     if (p->compute_tpwgs) {
432:       PetscMPIInt gsize;
433:       PetscCall(PetscMalloc1(size, &lparts[l]));
434:       PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
435:       if (!l) {
436:         PetscMPIInt tr;
437:         PetscCallMPI(MPI_Group_translate_ranks(wgroup, 1, &rank, p->lgroup[0], &tr));
438:         if (tr == MPI_UNDEFINED) gsize = 0;
439:       }
440:       if (touched && !gsize) gsize = 1;
441:       PetscCallMPI(MPI_Allgather(&gsize, 1, MPI_INT, lparts[l], 1, MPI_INT, comm));
442:       if (lparts[l][rank]) touched = PETSC_TRUE;
443:     }
444:   }

446:   /* determine weights (bottom-up) */
447:   if (p->compute_tpwgs) {
448:     PetscMPIInt *tranks;
449:     PetscInt    *lwgts, wgt;
450:     MPI_Op       MPIU_LCM; /* XXX this is actually recreated at every setup */

452:     PetscCall(PetscMalloc1((2 * size), &tranks));

454:     /* we need to compute the least common multiple across processes */
455:     PetscCallMPI(MPI_Op_create(PetscLCM_Local, 1, &MPIU_LCM));

457:     /* final target has to have all ones as weights (if the process gets touched) */
458:     wgt = touched ? 1 : 0;
459:     PetscCall(PetscMalloc1(size, &lwgts));
460:     PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));

462:     /* now go up the hierarchy and populate the PetscSection describing the partition weights */
463:     for (PetscInt l = p->levels - 1; l >= 0; l--) {
464:       MPI_Comm    pcomm;
465:       MPI_Group   igroup;
466:       PetscMPIInt isize, isizer = 0;
467:       PetscInt    a, b, wgtr;
468:       PetscMPIInt gsize;
469:       PetscBool   usep = PETSC_FALSE;

471:       if (p->ppart[l]) {
472:         PetscCall(PetscObjectGetComm((PetscObject)p->ppart[l], &pcomm));
473:       } else {
474:         pcomm = PETSC_COMM_SELF;
475:       }

477:       PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
478:       if (gsize) {
479:         usep = PETSC_TRUE;
480:         for (PetscMPIInt i = 0; i < gsize; i++) tranks[i] = i;
481:         PetscCallMPI(MPI_Group_translate_ranks(p->lgroup[l], gsize, tranks, wgroup, tranks + size));
482:       } else gsize = lparts[l][rank];
483:       PetscCall(PetscFree(lparts[l]));
484:       PetscCall(PetscSectionCreate(pcomm, &p->tpwgs[l]));
485:       PetscCall(PetscObjectSetName((PetscObject)p->tpwgs[l], "Target partition weights"));
486:       PetscCall(PetscSectionSetChart(p->tpwgs[l], 0, gsize));

488:       if (usep) {
489:         PetscMPIInt *tr = tranks + size;
490:         for (PetscMPIInt i = 0; i < gsize; i++) PetscCall(PetscSectionSetDof(p->tpwgs[l], i, lwgts[tr[i]]));
491:       } else if (gsize) {
492:         PetscCall(PetscSectionSetDof(p->tpwgs[l], 0, 1));
493:       }
494:       PetscCall(PetscSectionSetUp(p->tpwgs[l]));
495:       if (!l) break;

497:       /* determine number of processes shared by two consecutive levels */
498:       PetscCallMPI(MPI_Group_intersection(p->lgroup[l], p->lgroup[l - 1], &igroup));
499:       PetscCallMPI(MPI_Group_size(igroup, &isize));
500:       PetscCallMPI(MPI_Group_free(&igroup));
501:       if (!p->ppart[l] && touched) isize = 1; /* if this process gets touched, needs to propagate its data from one level to the other */

503:       /* reduce on level partitioner comm the size of the max size of the igroup */
504:       PetscCallMPI(MPIU_Allreduce(&isize, &isizer, 1, MPI_INT, MPI_MAX, pcomm));

506:       /* sum previously computed partition weights on the level comm */
507:       wgt  = lwgts[rank];
508:       wgtr = wgt;
509:       PetscCallMPI(MPIU_Allreduce(&wgt, &wgtr, 1, MPIU_INT, MPI_SUM, pcomm));

511:       /* partition weights are given with integers; to properly compute these and be able to propagate them to the next level,
512:          we need to compute the least common multiple of isizer across the global comm */
513:       a = isizer ? isizer : 1;
514:       b = a;
515:       PetscCallMPI(MPIU_Allreduce(&a, &b, 1, MPIU_INT, MPIU_LCM, comm));

517:       /* finally share this process weight with all the other processes */
518:       wgt = isizer ? (b * wgtr) / isizer : 0;
519:       PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
520:     }
521:     PetscCall(PetscFree(lwgts));
522:     PetscCall(PetscFree(tranks));
523:     PetscCall(PetscFree(lparts));
524:     PetscCallMPI(MPI_Op_free(&MPIU_LCM));
525:   }
526:   PetscCallMPI(MPI_Group_free(&wgroup));
527:   PetscCall(PetscLogEventEnd(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
528:   PetscFunctionReturn(PETSC_SUCCESS);
529: }

531: PetscErrorCode PetscPartitionerMultistageGetStages_Multistage(PetscPartitioner part, PetscInt *levels, MPI_Group *lgroup[])
532: {
533:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;

535:   PetscFunctionBegin;
536:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
538:   if (levels) *levels = p->levels;
539:   if (lgroup) *lgroup = p->lgroup;
540:   PetscFunctionReturn(PETSC_SUCCESS);
541: }

543: PetscErrorCode PetscPartitionerMultistageSetStage_Multistage(PetscPartitioner part, PetscInt stage, PetscObject odm)
544: {
545:   DM                   dm = (DM)odm;
546:   PetscPartitioner_MS *p  = (PetscPartitioner_MS *)part->data;

548:   PetscFunctionBegin;
549:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
550:   PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
551:   PetscCheck(stage >= 0 && stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", stage, p->levels);

553:   p->stage = stage;
554:   PetscCall(PetscObjectReference((PetscObject)dm));
555:   PetscCall(DMDestroy(&p->stagedm));
556:   p->stagedm = dm;
557:   PetscFunctionReturn(PETSC_SUCCESS);
558: }

560: PetscErrorCode PetscPartitionerMultistageGetStage_Multistage(PetscPartitioner part, PetscInt *stage, PetscObject *odm)
561: {
562:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;

564:   PetscFunctionBegin;
565:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
566:   if (stage) *stage = p->stage;
567:   if (odm) *odm = (PetscObject)p->stagedm;
568:   PetscFunctionReturn(PETSC_SUCCESS);
569: }

571: static PetscErrorCode PetscPartitionerSetUp_Multistage(PetscPartitioner part)
572: {
573:   PetscPartitioner_MS *p    = (PetscPartitioner_MS *)part->data;
574:   MPI_Comm             comm = PetscObjectComm((PetscObject)part);
575:   PetscInt             nstages;
576:   MPI_Group           *groups;

578:   PetscFunctionBegin;
579:   if (p->levels) PetscFunctionReturn(PETSC_SUCCESS);
580:   PetscCall(PetscPartitionerMultistage_CreateStages(comm, NULL, &nstages, &groups));
581:   PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
582:   PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
583:   PetscFunctionReturn(PETSC_SUCCESS);
584: }

586: /* targetSection argument unused, target partition weights are computed internally */
587: static PetscErrorCode PetscPartitionerPartition_Multistage(PetscPartitioner part, PetscInt nparts, PetscInt numVertices, PetscInt start[], PetscInt adjacency[], PetscSection vertSection, PetscSection edgeSection, PETSC_UNUSED PetscSection targetSection, PetscSection partSection, IS *partition)
588: {
589:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
590:   PetscPartitioner     ppart;
591:   PetscSection         ppartSection = NULL;
592:   IS                   lpartition;
593:   const PetscInt      *idxs;
594:   MPI_Comm             comm, pcomm;
595:   MPI_Group            group, lgroup, pgroup;
596:   PetscInt            *pstart, *padjacency;
597:   PetscInt             nid, i, pedgeCut;
598:   PetscMPIInt          sameComm, pparts;
599:   PetscBool            freeadj = PETSC_FALSE;

601:   PetscFunctionBegin;
602:   PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
603:   PetscCheck(p->stage >= 0 && p->stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", p->stage, p->levels);
604:   PetscCall(PetscLogEventBegin(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));

606:   /* Group for current stage (size of the group defines number of "local" partitions) */
607:   lgroup = p->lgroup[p->stage];
608:   PetscCallMPI(MPI_Group_size(lgroup, &pparts));

610:   /* Current stage partitioner */
611:   ppart = p->ppart[p->stage];
612:   if (ppart) {
613:     PetscCall(PetscObjectGetComm((PetscObject)ppart, &pcomm));
614:     PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
615:   } else {
616:     pcomm  = PETSC_COMM_SELF;
617:     pgroup = MPI_GROUP_EMPTY;
618:     pparts = -1;
619:   }

621:   /* Global comm of partitioner */
622:   PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
623:   PetscCallMPI(MPI_Comm_group(comm, &group));

625:   /* Get adjacency */
626:   PetscCallMPI(MPI_Group_compare(group, pgroup, &sameComm));
627:   if (sameComm != MPI_UNEQUAL) {
628:     pstart     = start;
629:     padjacency = adjacency;
630:   } else { /* restrict to partitioner comm */
631:     ISLocalToGlobalMapping l2g, g2l;
632:     IS                     gid, rid;
633:     const PetscInt        *idxs1;
634:     PetscInt              *idxs2;
635:     PetscInt               cStart, cEnd, cum;
636:     DM                     dm = p->stagedm;
637:     PetscSF                sf;

639:     PetscCheck(dm, PetscObjectComm((PetscObject)part), PETSC_ERR_PLIB, "Missing stage DM");
640:     PetscCall(DMGetPointSF(dm, &sf));
641:     PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
642:     PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, part->height, NULL, sf, &gid));
643:     /* filter overlapped local cells (if any) */
644:     PetscCall(ISGetIndices(gid, &idxs1));
645:     PetscCall(ISGetLocalSize(gid, &cum));
646:     PetscCall(PetscMalloc1(cum, &idxs2));
647:     for (i = cStart, cum = 0; i < cEnd; i++) {
648:       if (idxs1[i - cStart] < 0) continue;
649:       idxs2[cum++] = idxs1[i - cStart];
650:     }
651:     PetscCall(ISRestoreIndices(gid, &idxs1));

653:     PetscCheck(numVertices == cum, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, numVertices, cum);
654:     PetscCall(ISDestroy(&gid));

656:     /*
657:        g2l from full numbering to local numbering
658:        l2g from local numbering to restricted numbering
659:     */
660:     PetscCall(ISCreateGeneral(pcomm, numVertices, idxs2, PETSC_OWN_POINTER, &gid));
661:     PetscCall(ISRenumber(gid, NULL, NULL, &rid));
662:     PetscCall(ISLocalToGlobalMappingCreateIS(gid, &g2l));
663:     PetscCall(ISLocalToGlobalMappingSetType(g2l, ISLOCALTOGLOBALMAPPINGHASH));
664:     PetscCall(ISLocalToGlobalMappingCreateIS(rid, &l2g));
665:     PetscCall(ISDestroy(&gid));
666:     PetscCall(ISDestroy(&rid));

668:     PetscCall(PetscMalloc2(numVertices + 1, &pstart, start[numVertices], &padjacency));
669:     pstart[0] = 0;
670:     for (i = 0; i < numVertices; i++) {
671:       PetscCall(ISGlobalToLocalMappingApply(g2l, IS_GTOLM_DROP, start[i + 1] - start[i], adjacency + start[i], &pstart[i + 1], padjacency + pstart[i]));
672:       PetscCall(ISLocalToGlobalMappingApply(l2g, pstart[i + 1], padjacency + pstart[i], padjacency + pstart[i]));
673:       pstart[i + 1] += pstart[i];
674:     }
675:     PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
676:     PetscCall(ISLocalToGlobalMappingDestroy(&g2l));

678:     freeadj = PETSC_TRUE;
679:   }

681:   /* Compute partitioning */
682:   pedgeCut = 0;
683:   PetscCall(PetscSectionCreate(pcomm, &ppartSection));
684:   if (ppart) {
685:     PetscMPIInt prank;

687:     PetscCheck(ppart->ops->partition, PetscObjectComm((PetscObject)ppart), PETSC_ERR_ARG_WRONGSTATE, "PetscPartitioner has no partitioning method on stage %" PetscInt_FMT, p->stage);
688:     PetscCall(PetscPartitionerPartition(ppart, pparts, numVertices, pstart, padjacency, vertSection, edgeSection, p->tpwgs[p->stage], ppartSection, &lpartition));
689:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ppart), &prank));
690:     if (!prank) pedgeCut = ppart->edgeCut; /* only the master rank will reduce */
691:   } else {                                 /* not participating */
692:     PetscCall(ISCreateStride(PETSC_COMM_SELF, numVertices, 0, 1, &lpartition));
693:     pparts = numVertices > 0 ? 1 : 0;
694:   }
695:   if (freeadj) PetscCall(PetscFree2(pstart, padjacency));

697:   /* Init final partition (output) */
698:   PetscCall(PetscSectionReset(partSection));
699:   PetscCall(PetscSectionSetChart(partSection, 0, nparts));

701:   /* We need to map the section back to the global comm numbering */
702:   for (i = 0; i < pparts; i++) {
703:     PetscInt    dof;
704:     PetscMPIInt mp, mpt;

706:     if (lgroup != MPI_GROUP_EMPTY) {
707:       PetscCall(PetscMPIIntCast(i, &mp));
708:       PetscCall(PetscSectionGetDof(ppartSection, i, &dof));
709:       PetscCallMPI(MPI_Group_translate_ranks(lgroup, 1, &mp, group, &mpt));
710:     } else {
711:       PetscCheck(pparts == 1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unexpected pparts %d", pparts);
712:       PetscCallMPI(MPI_Comm_rank(comm, &mpt));
713:       PetscCall(ISGetLocalSize(lpartition, &dof));
714:     }
715:     PetscCall(PetscSectionSetDof(partSection, mpt, dof));
716:   }
717:   PetscCall(PetscSectionSetUp(partSection));
718:   PetscCall(PetscSectionDestroy(&ppartSection));

720:   /* No need to translate the "partition" output, as it is in local cell numbering
721:      we only change the comm of the index set */
722:   PetscCall(ISGetIndices(lpartition, &idxs));
723:   PetscCall(ISGetLocalSize(lpartition, &nid));
724:   PetscCall(ISCreateGeneral(comm, nid, idxs, PETSC_COPY_VALUES, partition));
725:   PetscCall(ISRestoreIndices(lpartition, &idxs));
726:   PetscCall(ISDestroy(&lpartition));

728:   PetscCall(PetscSectionViewFromOptions(partSection, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
729:   PetscCall(ISViewFromOptions(*partition, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));

731:   /* Update edge-cut */
732:   p->edgeCut[p->stage] = pedgeCut;
733:   for (i = p->stage + 1; i < p->levels; i++) p->edgeCut[i] = 0;
734:   for (i = 0; i < p->stage; i++) pedgeCut += p->edgeCut[i];
735:   part->edgeCut = -1;
736:   PetscCallMPI(MPI_Reduce(&pedgeCut, &part->edgeCut, 1, MPIU_INT, MPI_SUM, 0, comm));

738:   PetscCallMPI(MPI_Group_free(&group));
739:   if (pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
740:   PetscCall(PetscLogEventEnd(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
741:   PetscFunctionReturn(PETSC_SUCCESS);
742: }

744: static PetscErrorCode PetscPartitionerSetFromOptions_Multistage(PetscPartitioner part, PetscOptionItems PetscOptionsObject)
745: {
746:   PetscPartitioner_MS *p        = (PetscPartitioner_MS *)part->data;
747:   PetscEnum            strategy = (PetscEnum)PETSCPARTITIONER_MS_STRATEGY_NODE;
748:   PetscBool            set, roundrobin;
749:   PetscInt             options[3] = {PETSC_DETERMINE, PETSC_DETERMINE, PETSC_DETERMINE};

751:   PetscFunctionBegin;
752:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscPartitioner Multistate Options");
753:   PetscCall(PetscOptionsEnum("-petscpartitioner_multistage_strategy", "Default partitioning strategy", "", PetscPartitionerMultistageStrategyList, strategy, &strategy, &set));
754:   if (set || !p->levels) {
755:     options[0] = (PetscInt)strategy;
756:     switch (options[0]) {
757:     case PETSCPARTITIONER_MS_STRATEGY_NODE:
758:       options[1] = PETSC_DETERMINE;
759:       roundrobin = PETSC_FALSE;
760:       PetscCall(PetscOptionsInt("-petscpartitioner_multistage_node_size", "Number of processes per node", "", options[1], &options[1], NULL));
761:       PetscCall(PetscOptionsBool("-petscpartitioner_multistage_node_interleaved", "Use round robin rank assignments", "", roundrobin, &roundrobin, NULL));
762:       options[2] = (PetscInt)roundrobin;
763:       break;
764:     case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
765:       options[1] = 2;
766:       PetscCall(PetscOptionsInt("-petscpartitioner_multistage_msection", "Number of splits per level", "", options[1], &options[1], NULL));
767:       break;
768:     default:
769:       break; /* LCOV_EXCL_LINE */
770:     }
771:   }
772:   PetscCall(PetscOptionsBool("-petscpartitioner_multistage_tpwgts", "Use target partition weights in stage partitioners", "", p->compute_tpwgs, &p->compute_tpwgs, NULL));
773:   PetscCall(PetscOptionsBool("-petscpartitioner_multistage_viewtpwgts", "View target partition weights", "", p->view_tpwgs, &p->view_tpwgs, NULL));
774:   PetscOptionsHeadEnd();

776:   if (options[0] != PETSC_DETERMINE) {
777:     MPI_Comm   comm = PetscObjectComm((PetscObject)part);
778:     PetscInt   nstages;
779:     MPI_Group *groups;

781:     PetscCall(PetscPartitionerMultistage_CreateStages(comm, options, &nstages, &groups));
782:     PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
783:     PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
784:   }

786:   {
787:     PetscInt nstages = p->levels, l;
788:     for (l = 0; l < nstages; l++) {
789:       if (p->ppart[l]) PetscCall(PetscPartitionerSetFromOptions(p->ppart[l]));
790:     }
791:   }
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: /*MC
796:   PETSCPARTITIONERMULTISTAGE = "multistage" - A PetscPartitioner object using a multistage distribution strategy

798:   Level: intermediate

800:   Options Database Keys:
801: +  -petscpartitioner_multistage_strategy <strategy> - Either `PETSCPARTITIONER_MS_STRATEGY_NODE`, or `PETSCPARTITIONER_MS_STRATEGY_MSECTION`
802: .  -petscpartitioner_multistage_node_size <int> - Number of processes per computing node (or `PETSC_DECIDE`)
803: .  -petscpartitioner_multistage_node_interleaved <bool> - Assign ranks round-robin.
804: .  -petscpartitioner_multistage_msection <int> - Number of splits per level in recursive m-section splits (use `2` for bisection)
805: -  -petscpartitioner_multistage_tpwgts <bool> - Use target partition weights in stage partitioner

807:   Notes:
808:   The default multistage strategy use `PETSCPARTITIONER_MS_STRATEGY_NODE` and automatically discovers node information using `MPI_Comm_split_type`.
809:   `PETSCPARTITIONER_MS_STRATEGY_MSECTION` is more for testing purposes.
810:   Options for single stage partitioners are prefixed by `-petscpartitioner_multistage_levels_`.
811:   For example, to use parmetis in all stages, `-petscpartitioner_multistage_levels_petscpartitioner_type parmetis`
812:   Finer grained control is also possible: `-petscpartitioner_multistage_levels_0_petscpartitioner_type parmetis`, `-petscpartitioner_multistage_levels_1_petscpartitioner_type simple`

814: .seealso: `PetscPartitionerType`, `PetscPartitionerCreate()`, `PetscPartitionerSetType()`
815: M*/

817: PETSC_EXTERN PetscErrorCode PetscPartitionerCreate_Multistage(PetscPartitioner part)
818: {
819:   PetscPartitioner_MS *p;

821:   PetscFunctionBegin;
823:   PetscCall(PetscNew(&p));
824:   p->compute_tpwgs = PETSC_TRUE;

826:   part->data                = p;
827:   part->ops->view           = PetscPartitionerView_Multistage;
828:   part->ops->destroy        = PetscPartitionerDestroy_Multistage;
829:   part->ops->partition      = PetscPartitionerPartition_Multistage;
830:   part->ops->setfromoptions = PetscPartitionerSetFromOptions_Multistage;
831:   part->ops->setup          = PetscPartitionerSetUp_Multistage;

833:   PetscCall(PetscCitationsRegister(MSPartitionerCitation, &MSPartitionerCite));
834:   PetscFunctionReturn(PETSC_SUCCESS);
835: }