Actual source code: sfbasic.c
1: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2: #include <../src/vec/is/sf/impls/basic/sfpack.h>
3: #include <petsc/private/viewerimpl.h>
5: /*===================================================================================*/
6: /* SF public interface implementations */
7: /*===================================================================================*/
8: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9: {
10: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
11: PetscInt *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
12: PetscMPIInt rank, niranks, *iranks, tag;
13: MPI_Comm comm;
14: MPI_Group group;
15: MPI_Request *rootreqs, *leafreqs;
17: PetscFunctionBegin;
18: PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
19: PetscCall(PetscSFSetUpRanks(sf, group));
20: PetscCallMPI(MPI_Group_free(&group));
21: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22: PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
23: PetscCallMPI(MPI_Comm_rank(comm, &rank));
24: /*
25: * Inform roots about how many leaves and from which ranks
26: */
27: PetscCall(PetscMalloc1(sf->nranks, &rlengths));
28: /* Determine number, sending ranks and length of incoming */
29: for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
30: nRemoteRootRanks = sf->nranks - sf->ndranks;
31: PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths));
33: /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
34: We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
35: small and the sorting is cheap.
36: */
37: PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
39: /* Partition into distinguished and non-distinguished incoming ranks */
40: bas->ndiranks = sf->ndranks;
41: bas->niranks = bas->ndiranks + niranks;
42: PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
43: bas->ioffset[0] = 0;
44: for (i = 0; i < bas->ndiranks; i++) {
45: bas->iranks[i] = sf->ranks[i];
46: bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
47: }
48: PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
49: for (; i < bas->niranks; i++) {
50: bas->iranks[i] = iranks[i - bas->ndiranks];
51: bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
52: }
53: bas->itotal = bas->ioffset[i];
54: PetscCall(PetscFree(rlengths));
55: PetscCall(PetscFree(iranks));
56: PetscCall(PetscFree(ilengths));
58: /* Send leaf identities to roots */
59: nRemoteLeafRanks = bas->niranks - bas->ndiranks;
60: PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
61: PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
62: for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
63: for (i = 0; i < sf->nranks; i++) {
64: PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
65: if (i < sf->ndranks) {
66: PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
67: PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
68: PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
69: PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
70: continue;
71: }
72: PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
73: }
74: PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
75: PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
77: sf->nleafreqs = nRemoteRootRanks;
78: bas->nrootreqs = nRemoteLeafRanks;
79: sf->persistent = PETSC_TRUE;
81: /* Setup fields related to packing, such as rootbuflen[] */
82: PetscCall(PetscSFSetUpPackFields(sf));
83: PetscCall(PetscFree2(rootreqs, leafreqs));
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
88: {
89: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
90: PetscSFLink link = bas->avail, next;
92: PetscFunctionBegin;
93: PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
94: PetscCall(PetscFree2(bas->iranks, bas->ioffset));
95: PetscCall(PetscFree(bas->irootloc));
97: #if defined(PETSC_HAVE_DEVICE)
98: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
99: #endif
101: #if defined(PETSC_HAVE_NVSHMEM)
102: PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
103: #endif
105: for (; link; link = next) {
106: next = link->next;
107: PetscCall(PetscSFLinkDestroy(sf, link));
108: }
109: bas->avail = NULL;
110: PetscCall(PetscSFResetPackFields(sf));
111: PetscFunctionReturn(PETSC_SUCCESS);
112: }
114: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
115: {
116: PetscFunctionBegin;
117: PetscCall(PetscSFReset_Basic(sf));
118: PetscCall(PetscFree(sf->data));
119: PetscFunctionReturn(PETSC_SUCCESS);
120: }
122: #if defined(PETSC_USE_SINGLE_LIBRARY)
123: #include <petscmat.h>
125: PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
126: {
127: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
128: PetscInt i, nrootranks, ndrootranks;
129: const PetscInt *rootoffset;
130: PetscMPIInt rank, size;
131: const PetscMPIInt *rootranks;
132: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
133: PetscScalar unitbytes;
134: Mat A;
136: PetscFunctionBegin;
137: PetscCallMPI(MPI_Comm_size(comm, &size));
138: PetscCallMPI(MPI_Comm_rank(comm, &rank));
139: /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
140: PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix
141: in senders' view point: how many bytes I will send to my neighbors.
143: Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
145: If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
146: different interpretations for the same SF for different data types. Since we most care about VecScatter,
147: we uniformly treat each vertex as a PetscScalar.
148: */
149: unitbytes = (PetscScalar)sizeof(PetscScalar);
151: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
152: PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
153: PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
154: for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
155: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
156: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
157: PetscCall(MatView(A, viewer));
158: PetscCall(MatDestroy(&A));
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
161: #endif
163: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
164: {
165: PetscBool isascii;
167: PetscFunctionBegin;
168: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
169: if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
170: #if defined(PETSC_USE_SINGLE_LIBRARY)
171: else {
172: PetscBool isdraw, isbinary;
173: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
174: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
175: if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
176: }
177: #endif
178: PetscFunctionReturn(PETSC_SUCCESS);
179: }
181: static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
182: {
183: PetscSFLink link = NULL;
185: PetscFunctionBegin;
186: /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
187: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
188: /* Pack rootdata to rootbuf for remote communication */
189: PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
190: /* Start communication, e.g., post MPI_Isend */
191: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
192: /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
193: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
194: PetscFunctionReturn(PETSC_SUCCESS);
195: }
197: PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
198: {
199: PetscSFLink link = NULL;
201: PetscFunctionBegin;
202: /* Retrieve the link used in XxxBegin() with root/leafdata as key */
203: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
204: /* Finish remote communication, e.g., post MPI_Waitall */
205: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
206: /* Unpack data in leafbuf to leafdata for remote communication */
207: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
208: /* Recycle the link */
209: PetscCall(PetscSFLinkReclaim(sf, &link));
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: /* Shared by ReduceBegin and FetchAndOpBegin */
214: static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
215: {
216: PetscSFLink link = NULL;
218: PetscFunctionBegin;
219: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
220: PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
221: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
222: *out = link;
223: PetscFunctionReturn(PETSC_SUCCESS);
224: }
226: /* leaf -> root with reduction */
227: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
228: {
229: PetscSFLink link = NULL;
231: PetscFunctionBegin;
232: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
233: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
238: {
239: PetscSFLink link = NULL;
241: PetscFunctionBegin;
242: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
243: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
244: PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
245: PetscCall(PetscSFLinkReclaim(sf, &link));
246: PetscFunctionReturn(PETSC_SUCCESS);
247: }
249: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
250: {
251: PetscSFLink link = NULL;
253: PetscFunctionBegin;
254: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
255: PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
260: {
261: PetscSFLink link = NULL;
263: PetscFunctionBegin;
264: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
265: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
266: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
267: /* Do fetch-and-op, the (remote) update results are in rootbuf */
268: PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
269: /* Bcast rootbuf to leafupdate */
270: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
271: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
272: /* Unpack and insert fetched data into leaves */
273: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
274: PetscCall(PetscSFLinkReclaim(sf, &link));
275: PetscFunctionReturn(PETSC_SUCCESS);
276: }
278: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
279: {
280: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
282: PetscFunctionBegin;
283: if (niranks) *niranks = bas->niranks;
284: if (iranks) *iranks = bas->iranks;
285: if (ioffset) *ioffset = bas->ioffset;
286: if (irootloc) *irootloc = bas->irootloc;
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }
290: /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
291: We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
292: was sorted before calling the routine.
293: */
294: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
295: {
296: PetscSF esf;
297: PetscInt esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
298: PetscInt i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
299: char *rootdata, *leafdata = NULL, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
300: PetscMPIInt *esf_ranks;
301: const PetscMPIInt *ranks, *iranks;
302: const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc;
303: PetscBool connected;
304: PetscSFNode *new_iremote;
305: PetscSF_Basic *bas;
307: PetscFunctionBegin;
308: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
309: PetscCall(PetscSFSetFromOptions(esf));
310: PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
312: /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
313: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
314: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
315: maxlocal = maxleaf - minleaf + 1;
316: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
317: if (leafmem) leafdata = leafmem - minleaf;
318: /* Tag selected roots */
319: for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
321: PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
322: PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
323: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
324: esf_nranks = esf_ndranks = esf_nleaves = 0;
325: for (i = 0; i < nranks; i++) {
326: connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
327: for (j = roffset[i]; j < roffset[i + 1]; j++) {
328: if (leafdata[rmine[j]]) {
329: esf_nleaves++;
330: connected = PETSC_TRUE;
331: }
332: }
333: if (connected) {
334: esf_nranks++;
335: if (i < ndranks) esf_ndranks++;
336: }
337: }
339: /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
340: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
341: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
342: PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
343: p = 0; /* Counter for connected root ranks */
344: q = 0; /* Counter for connected leaves */
345: esf_roffset[0] = 0;
346: for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
347: connected = PETSC_FALSE;
348: for (j = roffset[i]; j < roffset[i + 1]; j++) {
349: if (leafdata[rmine[j]]) {
350: esf_rmine[q] = new_ilocal[q] = rmine[j];
351: esf_rremote[q] = rremote[j];
352: new_iremote[q].index = rremote[j];
353: new_iremote[q].rank = ranks[i];
354: connected = PETSC_TRUE;
355: q++;
356: }
357: }
358: if (connected) {
359: esf_ranks[p] = ranks[i];
360: esf_roffset[p + 1] = q;
361: p++;
362: }
363: }
365: /* SetGraph internally resets the SF, so we only set its fields after the call */
366: PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
367: esf->nranks = esf_nranks;
368: esf->ndranks = esf_ndranks;
369: esf->ranks = esf_ranks;
370: esf->roffset = esf_roffset;
371: esf->rmine = esf_rmine;
372: esf->rremote = esf_rremote;
373: esf->nleafreqs = esf_nranks - esf_ndranks;
375: /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
376: bas = (PetscSF_Basic *)esf->data;
377: PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
378: /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
379: we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
380: */
381: PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
382: PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
383: bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
384: p = 0; /* Counter for connected leaf ranks */
385: q = 0; /* Counter for connected roots */
386: for (i = 0; i < niranks; i++) {
387: connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
388: for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
389: if (rootdata[irootloc[j]]) {
390: bas->irootloc[q++] = irootloc[j];
391: connected = PETSC_TRUE;
392: }
393: }
394: if (connected) {
395: bas->niranks++;
396: if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
397: bas->iranks[p] = iranks[i];
398: bas->ioffset[p + 1] = q;
399: p++;
400: }
401: }
402: bas->itotal = q;
403: bas->nrootreqs = bas->niranks - bas->ndiranks;
404: esf->persistent = PETSC_TRUE;
405: /* Setup packing related fields */
406: PetscCall(PetscSFSetUpPackFields(esf));
408: /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
409: #if defined(PETSC_HAVE_CUDA)
410: if (esf->backend == PETSCSF_BACKEND_CUDA) {
411: esf->ops->Malloc = PetscSFMalloc_CUDA;
412: esf->ops->Free = PetscSFFree_CUDA;
413: }
414: #endif
416: #if defined(PETSC_HAVE_HIP)
417: /* TODO: Needs debugging */
418: if (esf->backend == PETSCSF_BACKEND_HIP) {
419: esf->ops->Malloc = PetscSFMalloc_HIP;
420: esf->ops->Free = PetscSFFree_HIP;
421: }
422: #endif
424: #if defined(PETSC_HAVE_KOKKOS)
425: if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
426: esf->ops->Malloc = PetscSFMalloc_Kokkos;
427: esf->ops->Free = PetscSFFree_Kokkos;
428: }
429: #endif
430: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
431: PetscCall(PetscFree2(rootdata, leafmem));
432: *newsf = esf;
433: PetscFunctionReturn(PETSC_SUCCESS);
434: }
436: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
437: {
438: PetscSF_Basic *dat;
440: PetscFunctionBegin;
441: sf->ops->SetUp = PetscSFSetUp_Basic;
442: sf->ops->Reset = PetscSFReset_Basic;
443: sf->ops->Destroy = PetscSFDestroy_Basic;
444: sf->ops->View = PetscSFView_Basic;
445: sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
446: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
447: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
448: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
449: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
450: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
451: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
452: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
454: PetscCall(PetscNew(&dat));
455: sf->data = (void *)dat;
456: PetscFunctionReturn(PETSC_SUCCESS);
457: }