Actual source code: sfbasic.c
1: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2: #include <../src/vec/is/sf/impls/basic/sfpack.h>
3: #include <petsc/private/viewerimpl.h>
5: // Init persistent MPI send/recv requests
6: static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
7: {
8: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
9: PetscInt i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks;
10: const PetscInt *rootoffset, *leafoffset;
11: MPI_Aint disp;
12: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
13: MPI_Datatype unit = link->unit;
14: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
15: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
17: PetscFunctionBegin;
18: if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
19: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
20: if (direction == PETSCSF_LEAF2ROOT) {
21: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
22: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
23: cnt = rootoffset[i + 1] - rootoffset[i];
24: PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
25: }
26: } else { /* PETSCSF_ROOT2LEAF */
27: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
28: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
29: cnt = rootoffset[i + 1] - rootoffset[i];
30: PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
31: }
32: }
33: link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
34: }
36: if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) {
37: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
38: if (direction == PETSCSF_LEAF2ROOT) {
39: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
40: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
41: cnt = leafoffset[i + 1] - leafoffset[i];
42: PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
43: }
44: } else { /* PETSCSF_ROOT2LEAF */
45: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
46: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
47: cnt = leafoffset[i + 1] - leafoffset[i];
48: PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
49: }
50: }
51: link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE;
52: }
53: PetscFunctionReturn(PETSC_SUCCESS);
54: }
56: // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
57: static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
58: {
59: PetscMPIInt nsreqs = 0, nrreqs = 0;
60: MPI_Request *sreqs = NULL, *rreqs = NULL;
61: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
62: PetscInt sbuflen, rbuflen;
64: PetscFunctionBegin;
65: rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
66: if (rbuflen) {
67: if (direction == PETSCSF_ROOT2LEAF) {
68: nrreqs = sf->nleafreqs;
69: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs));
70: } else { /* leaf to root */
71: nrreqs = bas->nrootreqs;
72: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL));
73: }
74: }
76: sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
77: if (sbuflen) {
78: if (direction == PETSCSF_ROOT2LEAF) {
79: nsreqs = bas->nrootreqs;
80: PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
81: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL));
82: } else { /* leaf to root */
83: nsreqs = sf->nleafreqs;
84: PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
85: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs));
86: }
87: }
88: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready
89: if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs));
90: if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs));
91: PetscFunctionReturn(PETSC_SUCCESS);
92: }
94: #if defined(PETSC_HAVE_MPIX_STREAM)
95: // issue MPIX_Isend/Irecv_enqueue()
96: static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
97: {
98: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
99: PetscInt i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks;
100: const PetscInt *rootoffset, *leafoffset;
101: MPI_Aint disp;
102: MPI_Comm stream_comm = sf->stream_comm;
103: MPI_Datatype unit = link->unit;
104: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
105: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
107: PetscFunctionBegin;
108: if (bas->rootbuflen[PETSCSF_REMOTE]) {
109: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
110: if (direction == PETSCSF_LEAF2ROOT) {
111: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
112: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
113: cnt = rootoffset[i + 1] - rootoffset[i];
114: PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
115: }
116: } else { // PETSCSF_ROOT2LEAF
117: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
118: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
119: cnt = rootoffset[i + 1] - rootoffset[i];
120: // no need to sync the gpu stream!
121: PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
122: }
123: }
124: }
126: if (sf->leafbuflen[PETSCSF_REMOTE]) {
127: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
128: if (direction == PETSCSF_LEAF2ROOT) {
129: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
130: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
131: cnt = leafoffset[i + 1] - leafoffset[i];
132: // no need to sync the gpu stream!
133: PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
134: }
135: } else { // PETSCSF_ROOT2LEAF
136: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
137: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
138: cnt = leafoffset[i + 1] - leafoffset[i];
139: PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
140: }
141: }
142: }
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
146: static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
147: {
148: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
149: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
150: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
152: PetscFunctionBegin;
153: PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
154: PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }
157: #endif
159: static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link)
160: {
161: PetscFunctionBegin;
162: link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Basic;
163: link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic;
164: #if defined(PETSC_HAVE_MPIX_STREAM)
165: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
166: if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
167: link->StartCommunication = PetscSFLinkStartCommunication_MPIX_Stream;
168: link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream;
169: }
170: #endif
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: /*===================================================================================*/
175: /* SF public interface implementations */
176: /*===================================================================================*/
177: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
178: {
179: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
180: PetscInt *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
181: PetscMPIInt rank, niranks, *iranks, tag;
182: MPI_Comm comm;
183: MPI_Group group;
184: MPI_Request *rootreqs, *leafreqs;
186: PetscFunctionBegin;
187: PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
188: PetscCall(PetscSFSetUpRanks(sf, group));
189: PetscCallMPI(MPI_Group_free(&group));
190: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
191: PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
192: PetscCallMPI(MPI_Comm_rank(comm, &rank));
193: /*
194: * Inform roots about how many leaves and from which ranks
195: */
196: PetscCall(PetscMalloc1(sf->nranks, &rlengths));
197: /* Determine number, sending ranks and length of incoming */
198: for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
199: nRemoteRootRanks = sf->nranks - sf->ndranks;
200: PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths));
202: /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
203: We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
204: small and the sorting is cheap.
205: */
206: PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
208: /* Partition into distinguished and non-distinguished incoming ranks */
209: bas->ndiranks = sf->ndranks;
210: bas->niranks = bas->ndiranks + niranks;
211: PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
212: bas->ioffset[0] = 0;
213: for (i = 0; i < bas->ndiranks; i++) {
214: bas->iranks[i] = sf->ranks[i];
215: bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
216: }
217: PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
218: for (; i < bas->niranks; i++) {
219: bas->iranks[i] = iranks[i - bas->ndiranks];
220: bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
221: }
222: bas->itotal = bas->ioffset[i];
223: PetscCall(PetscFree(rlengths));
224: PetscCall(PetscFree(iranks));
225: PetscCall(PetscFree(ilengths));
227: /* Send leaf identities to roots */
228: nRemoteLeafRanks = bas->niranks - bas->ndiranks;
229: PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
230: PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
231: for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
232: for (i = 0; i < sf->nranks; i++) {
233: PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
234: if (i < sf->ndranks) {
235: PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
236: PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
237: PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
238: PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
239: continue;
240: }
241: PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
242: }
243: PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
244: PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
246: sf->nleafreqs = nRemoteRootRanks;
247: bas->nrootreqs = nRemoteLeafRanks;
249: /* Setup fields related to packing, such as rootbuflen[] */
250: PetscCall(PetscSFSetUpPackFields(sf));
251: PetscCall(PetscFree2(rootreqs, leafreqs));
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
256: {
257: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
258: PetscSFLink link = bas->avail, next;
260: PetscFunctionBegin;
261: PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
262: PetscCall(PetscFree2(bas->iranks, bas->ioffset));
263: PetscCall(PetscFree(bas->irootloc));
265: #if defined(PETSC_HAVE_DEVICE)
266: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
267: #endif
269: #if defined(PETSC_HAVE_NVSHMEM)
270: PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
271: #endif
273: for (; link; link = next) {
274: next = link->next;
275: PetscCall(PetscSFLinkDestroy(sf, link));
276: }
277: bas->avail = NULL;
278: PetscCall(PetscSFResetPackFields(sf));
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
283: {
284: PetscFunctionBegin;
285: PetscCall(PetscSFReset_Basic(sf));
286: PetscCall(PetscFree(sf->data));
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }
290: #if defined(PETSC_USE_SINGLE_LIBRARY)
291: #include <petscmat.h>
293: PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
294: {
295: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
296: PetscInt i, nrootranks, ndrootranks;
297: const PetscInt *rootoffset;
298: PetscMPIInt rank, size;
299: const PetscMPIInt *rootranks;
300: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
301: PetscScalar unitbytes;
302: Mat A;
304: PetscFunctionBegin;
305: PetscCallMPI(MPI_Comm_size(comm, &size));
306: PetscCallMPI(MPI_Comm_rank(comm, &rank));
307: /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
308: PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix
309: in senders' view point: how many bytes I will send to my neighbors.
311: Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
313: If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
314: different interpretations for the same SF for different data types. Since we most care about VecScatter,
315: we uniformly treat each vertex as a PetscScalar.
316: */
317: unitbytes = (PetscScalar)sizeof(PetscScalar);
319: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
320: PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
321: PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
322: for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
323: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
324: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
325: PetscCall(MatView(A, viewer));
326: PetscCall(MatDestroy(&A));
327: PetscFunctionReturn(PETSC_SUCCESS);
328: }
329: #endif
331: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
332: {
333: PetscBool isascii;
335: PetscFunctionBegin;
336: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
337: if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
338: #if defined(PETSC_USE_SINGLE_LIBRARY)
339: else {
340: PetscBool isdraw, isbinary;
341: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
342: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
343: if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
344: }
345: #endif
346: PetscFunctionReturn(PETSC_SUCCESS);
347: }
349: PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
350: {
351: PetscSFLink link = NULL;
353: PetscFunctionBegin;
354: /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
355: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
356: /* Pack rootdata to rootbuf for remote communication */
357: PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
358: /* Start communication, e.g., post MPI_Isend */
359: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
360: /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
361: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
362: PetscFunctionReturn(PETSC_SUCCESS);
363: }
365: PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
366: {
367: PetscSFLink link = NULL;
369: PetscFunctionBegin;
370: /* Retrieve the link used in XxxBegin() with root/leafdata as key */
371: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
372: /* Finish remote communication, e.g., post MPI_Waitall */
373: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
374: /* Unpack data in leafbuf to leafdata for remote communication */
375: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
376: /* Recycle the link */
377: PetscCall(PetscSFLinkReclaim(sf, &link));
378: PetscFunctionReturn(PETSC_SUCCESS);
379: }
381: /* Shared by ReduceBegin and FetchAndOpBegin */
382: static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
383: {
384: PetscSFLink link = NULL;
386: PetscFunctionBegin;
387: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
388: PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
389: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
390: *out = link;
391: PetscFunctionReturn(PETSC_SUCCESS);
392: }
394: /* leaf -> root with reduction */
395: PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
396: {
397: PetscSFLink link = NULL;
399: PetscFunctionBegin;
400: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
401: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
402: PetscFunctionReturn(PETSC_SUCCESS);
403: }
405: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
406: {
407: PetscSFLink link = NULL;
409: PetscFunctionBegin;
410: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
411: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
412: PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
413: PetscCall(PetscSFLinkReclaim(sf, &link));
414: PetscFunctionReturn(PETSC_SUCCESS);
415: }
417: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
418: {
419: PetscSFLink link = NULL;
421: PetscFunctionBegin;
422: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
423: PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
424: PetscFunctionReturn(PETSC_SUCCESS);
425: }
427: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
428: {
429: PetscSFLink link = NULL;
431: PetscFunctionBegin;
432: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
433: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
434: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
435: /* Do fetch-and-op, the (remote) update results are in rootbuf */
436: PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
437: /* Bcast rootbuf to leafupdate */
438: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
439: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
440: /* Unpack and insert fetched data into leaves */
441: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
442: PetscCall(PetscSFLinkReclaim(sf, &link));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
447: {
448: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
450: PetscFunctionBegin;
451: if (niranks) *niranks = bas->niranks;
452: if (iranks) *iranks = bas->iranks;
453: if (ioffset) *ioffset = bas->ioffset;
454: if (irootloc) *irootloc = bas->irootloc;
455: PetscFunctionReturn(PETSC_SUCCESS);
456: }
458: /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
459: We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
460: was sorted before calling the routine.
461: */
462: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
463: {
464: PetscSF esf;
465: PetscInt esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
466: PetscInt i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
467: char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
468: PetscMPIInt *esf_ranks;
469: const PetscMPIInt *ranks, *iranks;
470: const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc;
471: PetscBool connected;
472: PetscSFNode *new_iremote;
473: PetscSF_Basic *bas;
475: PetscFunctionBegin;
476: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
477: PetscCall(PetscSFSetFromOptions(esf));
478: PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
480: /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
481: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
482: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
483: maxlocal = maxleaf - minleaf + 1;
484: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
485: leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
486: /* Tag selected roots */
487: for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
489: PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
490: PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
491: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
492: esf_nranks = esf_ndranks = esf_nleaves = 0;
493: for (i = 0; i < nranks; i++) {
494: connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
495: for (j = roffset[i]; j < roffset[i + 1]; j++) {
496: if (leafdata[rmine[j]]) {
497: esf_nleaves++;
498: connected = PETSC_TRUE;
499: }
500: }
501: if (connected) {
502: esf_nranks++;
503: if (i < ndranks) esf_ndranks++;
504: }
505: }
507: /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
508: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
509: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
510: PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
511: p = 0; /* Counter for connected root ranks */
512: q = 0; /* Counter for connected leaves */
513: esf_roffset[0] = 0;
514: for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
515: connected = PETSC_FALSE;
516: for (j = roffset[i]; j < roffset[i + 1]; j++) {
517: if (leafdata[rmine[j]]) {
518: esf_rmine[q] = new_ilocal[q] = rmine[j];
519: esf_rremote[q] = rremote[j];
520: new_iremote[q].index = rremote[j];
521: new_iremote[q].rank = ranks[i];
522: connected = PETSC_TRUE;
523: q++;
524: }
525: }
526: if (connected) {
527: esf_ranks[p] = ranks[i];
528: esf_roffset[p + 1] = q;
529: p++;
530: }
531: }
533: /* SetGraph internally resets the SF, so we only set its fields after the call */
534: PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
535: esf->nranks = esf_nranks;
536: esf->ndranks = esf_ndranks;
537: esf->ranks = esf_ranks;
538: esf->roffset = esf_roffset;
539: esf->rmine = esf_rmine;
540: esf->rremote = esf_rremote;
541: esf->nleafreqs = esf_nranks - esf_ndranks;
543: /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
544: bas = (PetscSF_Basic *)esf->data;
545: PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
546: /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
547: we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
548: */
549: PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
550: PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
551: bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
552: p = 0; /* Counter for connected leaf ranks */
553: q = 0; /* Counter for connected roots */
554: for (i = 0; i < niranks; i++) {
555: connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
556: for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
557: if (rootdata[irootloc[j]]) {
558: bas->irootloc[q++] = irootloc[j];
559: connected = PETSC_TRUE;
560: }
561: }
562: if (connected) {
563: bas->niranks++;
564: if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
565: bas->iranks[p] = iranks[i];
566: bas->ioffset[p + 1] = q;
567: p++;
568: }
569: }
570: bas->itotal = q;
571: bas->nrootreqs = bas->niranks - bas->ndiranks;
572: esf->persistent = PETSC_TRUE;
573: /* Setup packing related fields */
574: PetscCall(PetscSFSetUpPackFields(esf));
576: /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
577: #if defined(PETSC_HAVE_CUDA)
578: if (esf->backend == PETSCSF_BACKEND_CUDA) {
579: esf->ops->Malloc = PetscSFMalloc_CUDA;
580: esf->ops->Free = PetscSFFree_CUDA;
581: }
582: #endif
584: #if defined(PETSC_HAVE_HIP)
585: /* TODO: Needs debugging */
586: if (esf->backend == PETSCSF_BACKEND_HIP) {
587: esf->ops->Malloc = PetscSFMalloc_HIP;
588: esf->ops->Free = PetscSFFree_HIP;
589: }
590: #endif
592: #if defined(PETSC_HAVE_KOKKOS)
593: if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
594: esf->ops->Malloc = PetscSFMalloc_Kokkos;
595: esf->ops->Free = PetscSFFree_Kokkos;
596: }
597: #endif
598: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
599: PetscCall(PetscFree2(rootdata, leafmem));
600: *newsf = esf;
601: PetscFunctionReturn(PETSC_SUCCESS);
602: }
604: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
605: {
606: PetscSF_Basic *dat;
608: PetscFunctionBegin;
609: sf->ops->SetUp = PetscSFSetUp_Basic;
610: sf->ops->Reset = PetscSFReset_Basic;
611: sf->ops->Destroy = PetscSFDestroy_Basic;
612: sf->ops->View = PetscSFView_Basic;
613: sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
614: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
615: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
616: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
617: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
618: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
619: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
620: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
621: sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Basic;
623: sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv
624: sf->collective = PETSC_FALSE;
626: PetscCall(PetscNew(&dat));
627: sf->data = (void *)dat;
628: PetscFunctionReturn(PETSC_SUCCESS);
629: }