Actual source code: sfbasic.c

  1: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
  2: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  3: #include <petsc/private/viewerimpl.h>

  5: /*===================================================================================*/
  6: /*              SF public interface implementations                                  */
  7: /*===================================================================================*/
  8: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
  9: {
 10:   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
 11:   PetscInt      *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
 12:   PetscMPIInt    rank, niranks, *iranks, tag;
 13:   MPI_Comm       comm;
 14:   MPI_Group      group;
 15:   MPI_Request   *rootreqs, *leafreqs;

 17:   PetscFunctionBegin;
 18:   PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
 19:   PetscCall(PetscSFSetUpRanks(sf, group));
 20:   PetscCallMPI(MPI_Group_free(&group));
 21:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
 22:   PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
 23:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 24:   /*
 25:    * Inform roots about how many leaves and from which ranks
 26:    */
 27:   PetscCall(PetscMalloc1(sf->nranks, &rlengths));
 28:   /* Determine number, sending ranks and length of incoming */
 29:   for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
 30:   nRemoteRootRanks = sf->nranks - sf->ndranks;
 31:   PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths));

 33:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
 34:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
 35:      small and the sorting is cheap.
 36:    */
 37:   PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));

 39:   /* Partition into distinguished and non-distinguished incoming ranks */
 40:   bas->ndiranks = sf->ndranks;
 41:   bas->niranks  = bas->ndiranks + niranks;
 42:   PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
 43:   bas->ioffset[0] = 0;
 44:   for (i = 0; i < bas->ndiranks; i++) {
 45:     bas->iranks[i]      = sf->ranks[i];
 46:     bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
 47:   }
 48:   PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
 49:   for (; i < bas->niranks; i++) {
 50:     bas->iranks[i]      = iranks[i - bas->ndiranks];
 51:     bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
 52:   }
 53:   bas->itotal = bas->ioffset[i];
 54:   PetscCall(PetscFree(rlengths));
 55:   PetscCall(PetscFree(iranks));
 56:   PetscCall(PetscFree(ilengths));

 58:   /* Send leaf identities to roots */
 59:   nRemoteLeafRanks = bas->niranks - bas->ndiranks;
 60:   PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
 61:   PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
 62:   for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
 63:   for (i = 0; i < sf->nranks; i++) {
 64:     PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
 65:     if (i < sf->ndranks) {
 66:       PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
 67:       PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
 68:       PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
 69:       PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
 70:       continue;
 71:     }
 72:     PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
 73:   }
 74:   PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
 75:   PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));

 77:   sf->nleafreqs  = nRemoteRootRanks;
 78:   bas->nrootreqs = nRemoteLeafRanks;
 79:   sf->persistent = PETSC_TRUE;

 81:   /* Setup fields related to packing, such as rootbuflen[] */
 82:   PetscCall(PetscSFSetUpPackFields(sf));
 83:   PetscCall(PetscFree2(rootreqs, leafreqs));
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
 88: {
 89:   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
 90:   PetscSFLink    link = bas->avail, next;

 92:   PetscFunctionBegin;
 93:   PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
 94:   PetscCall(PetscFree2(bas->iranks, bas->ioffset));
 95:   PetscCall(PetscFree(bas->irootloc));

 97: #if defined(PETSC_HAVE_DEVICE)
 98:   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
 99: #endif

101: #if defined(PETSC_HAVE_NVSHMEM)
102:   PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
103: #endif

105:   for (; link; link = next) {
106:     next = link->next;
107:     PetscCall(PetscSFLinkDestroy(sf, link));
108:   }
109:   bas->avail = NULL;
110:   PetscCall(PetscSFResetPackFields(sf));
111:   PetscFunctionReturn(PETSC_SUCCESS);
112: }

114: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
115: {
116:   PetscFunctionBegin;
117:   PetscCall(PetscSFReset_Basic(sf));
118:   PetscCall(PetscFree(sf->data));
119:   PetscFunctionReturn(PETSC_SUCCESS);
120: }

122: #if defined(PETSC_USE_SINGLE_LIBRARY)
123: #include <petscmat.h>

125: PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
126: {
127:   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
128:   PetscInt           i, nrootranks, ndrootranks;
129:   const PetscInt    *rootoffset;
130:   PetscMPIInt        rank, size;
131:   const PetscMPIInt *rootranks;
132:   MPI_Comm           comm = PetscObjectComm((PetscObject)sf);
133:   PetscScalar        unitbytes;
134:   Mat                A;

136:   PetscFunctionBegin;
137:   PetscCallMPI(MPI_Comm_size(comm, &size));
138:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
139:   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
140:     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
141:     in senders' view point: how many bytes I will send to my neighbors.

143:     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.

145:     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
146:     different interpretations for the same SF for different data types. Since we most care about VecScatter,
147:     we uniformly treat each vertex as a PetscScalar.
148:   */
149:   unitbytes = (PetscScalar)sizeof(PetscScalar);

151:   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
152:   PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
153:   PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
154:   for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
155:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
156:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
157:   PetscCall(MatView(A, viewer));
158:   PetscCall(MatDestroy(&A));
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }
161: #endif

163: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
164: {
165:   PetscBool isascii;

167:   PetscFunctionBegin;
168:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
169:   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "  MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
170: #if defined(PETSC_USE_SINGLE_LIBRARY)
171:   else {
172:     PetscBool isdraw, isbinary;
173:     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
174:     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
175:     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
176:   }
177: #endif
178:   PetscFunctionReturn(PETSC_SUCCESS);
179: }

181: static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
182: {
183:   PetscSFLink link = NULL;

185:   PetscFunctionBegin;
186:   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
187:   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
188:   /* Pack rootdata to rootbuf for remote communication */
189:   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
190:   /* Start communication, e.g., post MPI_Isend */
191:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
192:   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
193:   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
194:   PetscFunctionReturn(PETSC_SUCCESS);
195: }

197: PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
198: {
199:   PetscSFLink link = NULL;

201:   PetscFunctionBegin;
202:   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
203:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
204:   /* Finish remote communication, e.g., post MPI_Waitall */
205:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
206:   /* Unpack data in leafbuf to leafdata for remote communication */
207:   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
208:   /* Recycle the link */
209:   PetscCall(PetscSFLinkReclaim(sf, &link));
210:   PetscFunctionReturn(PETSC_SUCCESS);
211: }

213: /* Shared by ReduceBegin and FetchAndOpBegin */
214: static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
215: {
216:   PetscSFLink link = NULL;

218:   PetscFunctionBegin;
219:   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
220:   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
221:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
222:   *out = link;
223:   PetscFunctionReturn(PETSC_SUCCESS);
224: }

226: /* leaf -> root with reduction */
227: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
228: {
229:   PetscSFLink link = NULL;

231:   PetscFunctionBegin;
232:   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
233:   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
238: {
239:   PetscSFLink link = NULL;

241:   PetscFunctionBegin;
242:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
243:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
244:   PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
245:   PetscCall(PetscSFLinkReclaim(sf, &link));
246:   PetscFunctionReturn(PETSC_SUCCESS);
247: }

249: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
250: {
251:   PetscSFLink link = NULL;

253:   PetscFunctionBegin;
254:   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
255:   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
260: {
261:   PetscSFLink link = NULL;

263:   PetscFunctionBegin;
264:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
265:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
266:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
267:   /* Do fetch-and-op, the (remote) update results are in rootbuf */
268:   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
269:   /* Bcast rootbuf to leafupdate */
270:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
271:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
272:   /* Unpack and insert fetched data into leaves */
273:   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
274:   PetscCall(PetscSFLinkReclaim(sf, &link));
275:   PetscFunctionReturn(PETSC_SUCCESS);
276: }

278: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
279: {
280:   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;

282:   PetscFunctionBegin;
283:   if (niranks) *niranks = bas->niranks;
284:   if (iranks) *iranks = bas->iranks;
285:   if (ioffset) *ioffset = bas->ioffset;
286:   if (irootloc) *irootloc = bas->irootloc;
287:   PetscFunctionReturn(PETSC_SUCCESS);
288: }

290: /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
291:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
292:    was sorted before calling the routine.
293:  */
294: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
295: {
296:   PetscSF            esf;
297:   PetscInt           esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
298:   PetscInt           i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
299:   char              *rootdata, *leafdata = NULL, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
300:   PetscMPIInt       *esf_ranks;
301:   const PetscMPIInt *ranks, *iranks;
302:   const PetscInt    *roffset, *rmine, *rremote, *ioffset, *irootloc;
303:   PetscBool          connected;
304:   PetscSFNode       *new_iremote;
305:   PetscSF_Basic     *bas;

307:   PetscFunctionBegin;
308:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
309:   PetscCall(PetscSFSetFromOptions(esf));
310:   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */

312:   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
313:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
314:   PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
315:   maxlocal = maxleaf - minleaf + 1;
316:   PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
317:   if (leafmem) leafdata = leafmem - minleaf;
318:   /* Tag selected roots */
319:   for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;

321:   PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
322:   PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
323:   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
324:   esf_nranks = esf_ndranks = esf_nleaves = 0;
325:   for (i = 0; i < nranks; i++) {
326:     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
327:     for (j = roffset[i]; j < roffset[i + 1]; j++) {
328:       if (leafdata[rmine[j]]) {
329:         esf_nleaves++;
330:         connected = PETSC_TRUE;
331:       }
332:     }
333:     if (connected) {
334:       esf_nranks++;
335:       if (i < ndranks) esf_ndranks++;
336:     }
337:   }

339:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
340:   PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
341:   PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
342:   PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
343:   p              = 0; /* Counter for connected root ranks */
344:   q              = 0; /* Counter for connected leaves */
345:   esf_roffset[0] = 0;
346:   for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
347:     connected = PETSC_FALSE;
348:     for (j = roffset[i]; j < roffset[i + 1]; j++) {
349:       if (leafdata[rmine[j]]) {
350:         esf_rmine[q] = new_ilocal[q] = rmine[j];
351:         esf_rremote[q]               = rremote[j];
352:         new_iremote[q].index         = rremote[j];
353:         new_iremote[q].rank          = ranks[i];
354:         connected                    = PETSC_TRUE;
355:         q++;
356:       }
357:     }
358:     if (connected) {
359:       esf_ranks[p]       = ranks[i];
360:       esf_roffset[p + 1] = q;
361:       p++;
362:     }
363:   }

365:   /* SetGraph internally resets the SF, so we only set its fields after the call */
366:   PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
367:   esf->nranks    = esf_nranks;
368:   esf->ndranks   = esf_ndranks;
369:   esf->ranks     = esf_ranks;
370:   esf->roffset   = esf_roffset;
371:   esf->rmine     = esf_rmine;
372:   esf->rremote   = esf_rremote;
373:   esf->nleafreqs = esf_nranks - esf_ndranks;

375:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
376:   bas = (PetscSF_Basic *)esf->data;
377:   PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
378:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
379:      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
380:    */
381:   PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
382:   PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
383:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
384:   p                                              = 0; /* Counter for connected leaf ranks */
385:   q                                              = 0; /* Counter for connected roots */
386:   for (i = 0; i < niranks; i++) {
387:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
388:     for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
389:       if (rootdata[irootloc[j]]) {
390:         bas->irootloc[q++] = irootloc[j];
391:         connected          = PETSC_TRUE;
392:       }
393:     }
394:     if (connected) {
395:       bas->niranks++;
396:       if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
397:       bas->iranks[p]      = iranks[i];
398:       bas->ioffset[p + 1] = q;
399:       p++;
400:     }
401:   }
402:   bas->itotal     = q;
403:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
404:   esf->persistent = PETSC_TRUE;
405:   /* Setup packing related fields */
406:   PetscCall(PetscSFSetUpPackFields(esf));

408:   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
409: #if defined(PETSC_HAVE_CUDA)
410:   if (esf->backend == PETSCSF_BACKEND_CUDA) {
411:     esf->ops->Malloc = PetscSFMalloc_CUDA;
412:     esf->ops->Free   = PetscSFFree_CUDA;
413:   }
414: #endif

416: #if defined(PETSC_HAVE_HIP)
417:   /* TODO: Needs debugging */
418:   if (esf->backend == PETSCSF_BACKEND_HIP) {
419:     esf->ops->Malloc = PetscSFMalloc_HIP;
420:     esf->ops->Free   = PetscSFFree_HIP;
421:   }
422: #endif

424: #if defined(PETSC_HAVE_KOKKOS)
425:   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
426:     esf->ops->Malloc = PetscSFMalloc_Kokkos;
427:     esf->ops->Free   = PetscSFFree_Kokkos;
428:   }
429: #endif
430:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
431:   PetscCall(PetscFree2(rootdata, leafmem));
432:   *newsf = esf;
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }

436: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
437: {
438:   PetscSF_Basic *dat;

440:   PetscFunctionBegin;
441:   sf->ops->SetUp                = PetscSFSetUp_Basic;
442:   sf->ops->Reset                = PetscSFReset_Basic;
443:   sf->ops->Destroy              = PetscSFDestroy_Basic;
444:   sf->ops->View                 = PetscSFView_Basic;
445:   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
446:   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
447:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
448:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
449:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
450:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
451:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
452:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;

454:   PetscCall(PetscNew(&dat));
455:   sf->data = (void *)dat;
456:   PetscFunctionReturn(PETSC_SUCCESS);
457: }