Actual source code: sfalltoall.c
1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
6: typedef PetscSF_Allgatherv PetscSF_Alltoall;
8: /*===================================================================================*/
9: /* Implementations of SF public APIs */
10: /*===================================================================================*/
11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
12: {
13: PetscInt i;
15: PetscFunctionBegin;
16: if (nroots) *nroots = sf->nroots;
17: if (nleaves) *nleaves = sf->nleaves;
18: if (ilocal) *ilocal = NULL; /* Contiguous local indices */
19: if (iremote) {
20: if (!sf->remote) {
21: PetscCall(PetscMalloc1(sf->nleaves, &sf->remote));
22: sf->remote_alloc = sf->remote;
23: for (i = 0; i < sf->nleaves; i++) {
24: sf->remote[i].rank = i;
25: sf->remote[i].index = i;
26: }
27: }
28: *iremote = sf->remote;
29: }
30: PetscFunctionReturn(PETSC_SUCCESS);
31: }
33: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
34: {
35: PetscSFLink link;
36: MPI_Comm comm;
37: void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
38: MPI_Request *req;
40: PetscFunctionBegin;
41: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
42: PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
43: PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
44: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
45: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL));
46: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF));
47: PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
52: {
53: PetscSFLink link;
54: MPI_Comm comm;
55: void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
56: MPI_Request *req;
58: PetscFunctionBegin;
59: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link));
60: PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
61: PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
62: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
63: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL));
64: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT));
65: PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req));
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out)
70: {
71: PetscInt nroots = 1, nleaves = 1, *ilocal;
72: PetscSFNode *iremote = NULL;
73: PetscSF lsf;
74: PetscMPIInt rank;
76: PetscFunctionBegin;
77: nroots = 1;
78: nleaves = 1;
79: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
80: PetscCall(PetscMalloc1(nleaves, &ilocal));
81: PetscCall(PetscMalloc1(nleaves, &iremote));
82: ilocal[0] = rank;
83: iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */
84: iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
86: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
87: PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
88: PetscCall(PetscSFSetUp(lsf));
89: *out = lsf;
90: PetscFunctionReturn(PETSC_SUCCESS);
91: }
93: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
94: {
95: PetscInt i, *tmproots, *ilocal, ndranks, ndiranks;
96: PetscSFNode *iremote;
97: PetscMPIInt nroots, *roots, nleaves, *leaves, rank;
98: MPI_Comm comm;
99: PetscSF_Basic *bas;
100: PetscSF esf;
102: PetscFunctionBegin;
103: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
104: PetscCallMPI(MPI_Comm_rank(comm, &rank));
106: /* Uniq selected[] and store the result in roots[] */
107: PetscCall(PetscMalloc1(nselected, &tmproots));
108: PetscCall(PetscArraycpy(tmproots, selected, nselected));
109: PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */
110: PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots);
111: nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */
112: PetscCall(PetscMalloc1(nselected, &roots));
113: for (i = 0; i < nselected; i++) roots[i] = tmproots[i];
114: PetscCall(PetscFree(tmproots));
116: /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
117: PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/));
119: /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
120: ndranks = 0;
121: for (i = 0; i < nleaves; i++) {
122: if (leaves[i] == rank) {
123: leaves[i] = -rank;
124: ndranks = 1;
125: break;
126: }
127: }
128: PetscCall(PetscSortMPIInt(nleaves, leaves));
129: if (nleaves && leaves[0] < 0) leaves[0] = rank;
131: /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
132: PetscCall(PetscMalloc1(nleaves, &ilocal));
133: PetscCall(PetscMalloc1(nleaves, &iremote));
134: for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */
135: ilocal[i] = leaves[i];
136: iremote[i].rank = leaves[i];
137: iremote[i].index = leaves[i];
138: }
139: PetscCall(PetscSFCreate(comm, &esf));
140: PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
141: PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
143: /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
144: PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote));
145: esf->nranks = nleaves;
146: esf->ndranks = ndranks;
147: esf->roffset[0] = 0;
148: for (i = 0; i < nleaves; i++) {
149: esf->ranks[i] = leaves[i];
150: esf->roffset[i + 1] = i + 1;
151: esf->rmine[i] = leaves[i];
152: esf->rremote[i] = leaves[i];
153: }
155: /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
156: bas = (PetscSF_Basic *)esf->data;
157: PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset));
158: PetscCall(PetscMalloc1(nroots, &bas->irootloc));
159: /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
160: ndiranks = 0;
161: for (i = 0; i < nroots; i++) {
162: if (roots[i] == rank) {
163: roots[i] = -rank;
164: ndiranks = 1;
165: break;
166: }
167: }
168: PetscCall(PetscSortMPIInt(nroots, roots));
169: if (nroots && roots[0] < 0) roots[0] = rank;
171: bas->niranks = nroots;
172: bas->ndiranks = ndiranks;
173: bas->ioffset[0] = 0;
174: bas->itotal = nroots;
175: for (i = 0; i < nroots; i++) {
176: bas->iranks[i] = roots[i];
177: bas->ioffset[i + 1] = i + 1;
178: bas->irootloc[i] = roots[i];
179: }
181: /* See PetscSFCreateEmbeddedRootSF_Basic */
182: esf->nleafreqs = esf->nranks - esf->ndranks;
183: bas->nrootreqs = bas->niranks - bas->ndiranks;
184: esf->persistent = PETSC_TRUE;
185: /* Setup packing related fields */
186: PetscCall(PetscSFSetUpPackFields(esf));
188: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
189: *newsf = esf;
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
194: {
195: PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data;
197: PetscFunctionBegin;
198: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
199: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
201: /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
202: sf->ops->Destroy = PetscSFDestroy_Allgatherv;
203: sf->ops->Reset = PetscSFReset_Allgatherv;
204: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
205: sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv;
207: /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
208: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
209: sf->ops->SetUp = PetscSFSetUp_Allgather;
211: /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
212: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
214: /* Alltoall stuff */
215: sf->ops->GetGraph = PetscSFGetGraph_Alltoall;
216: sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall;
217: sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall;
218: sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall;
219: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;
221: PetscCall(PetscNew(&dat));
222: sf->data = (void *)dat;
223: PetscFunctionReturn(PETSC_SUCCESS);
224: }