Actual source code: sfneighbor.c
1: #include <../src/vec/is/sf/impls/basic/sfpack.h>
2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
4: /* Convenience local types */
5: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
6: typedef MPI_Count PetscSFCount;
7: typedef MPI_Aint PetscSFAint;
8: #else
9: typedef PetscMPIInt PetscSFCount;
10: typedef PetscMPIInt PetscSFAint;
11: #endif
13: typedef struct {
14: SFBASICHEADER;
15: MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */
16: PetscBool initialized[2]; /* Are the two communicators initialized? */
17: PetscSFCount *rootcounts,*leafcounts; /* counts for non-distinguished ranks */
18: PetscSFAint *rootdispls,*leafdispls; /* displs for non-distinguished ranks */
19: PetscMPIInt *rootweights,*leafweights;
20: PetscInt rootdegree,leafdegree;
21: } PetscSF_Neighbor;
23: /*===================================================================================*/
24: /* Internal utility routines */
25: /*===================================================================================*/
27: static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend,PetscSFCount *sendcnts,MPI_Datatype sendtype,PetscInt nrecv,PetscSFCount* recvcnts,MPI_Datatype recvtype)
28: {
29: #if defined(PETSC_USE_LOG)
30: petsc_isend_ct += (PetscLogDouble)nsend;
31: petsc_irecv_ct += (PetscLogDouble)nrecv;
33: if (sendtype != MPI_DATATYPE_NULL) {
34: PetscMPIInt i,typesize;
35: MPI_Type_size(sendtype,&typesize);
36: for (i=0; i<nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i]*typesize);
37: }
39: if (recvtype != MPI_DATATYPE_NULL) {
40: PetscMPIInt i,typesize;
41: MPI_Type_size(recvtype,&typesize);
42: for (i=0; i<nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i]*typesize);
43: }
44: #endif
45: return 0;
46: }
48: /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
49: static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf,PetscSFDirection direction,MPI_Comm *distcomm)
50: {
51: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
52: PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks;
53: const PetscMPIInt *rootranks,*leafranks;
54: MPI_Comm comm;
56: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,NULL,NULL); /* Which ranks will access my roots (I am a destination) */
57: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,NULL,NULL,NULL); /* My leaves will access whose roots (I am a source) */
59: if (!dat->initialized[direction]) {
60: const PetscMPIInt indegree = nrootranks-ndrootranks,*sources = rootranks+ndrootranks;
61: const PetscMPIInt outdegree = nleafranks-ndleafranks,*destinations = leafranks+ndleafranks;
62: MPI_Comm *mycomm = &dat->comms[direction];
63: PetscObjectGetComm((PetscObject)sf,&comm);
64: if (direction == PETSCSF_LEAF2../../../../../..) {
65: MPI_Dist_graph_create_adjacent(comm,indegree,sources,dat->rootweights,outdegree,destinations,dat->leafweights,MPI_INFO_NULL,1/*reorder*/,mycomm);
66: } else { /* PETSCSF_../../../../../..2LEAF, reverse src & dest */
67: MPI_Dist_graph_create_adjacent(comm,outdegree,destinations,dat->leafweights,indegree,sources,dat->rootweights,MPI_INFO_NULL,1/*reorder*/,mycomm);
68: }
69: dat->initialized[direction] = PETSC_TRUE;
70: }
71: *distcomm = dat->comms[direction];
72: return 0;
73: }
75: /*===================================================================================*/
76: /* Implementations of SF public APIs */
77: /*===================================================================================*/
78: static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
79: {
80: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
81: PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
82: const PetscInt *rootoffset,*leafoffset;
83: PetscMPIInt m,n;
85: /* SFNeighbor inherits from Basic */
86: PetscSFSetUp_Basic(sf);
87: /* SFNeighbor specific */
88: sf->persistent = PETSC_FALSE;
89: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
90: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
91: dat->rootdegree = m = (PetscMPIInt)(nrootranks-ndrootranks);
92: dat->leafdegree = n = (PetscMPIInt)(nleafranks-ndleafranks);
93: sf->nleafreqs = 0;
94: dat->nrootreqs = 1;
96: /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
97: PetscMalloc6(m,&dat->rootdispls,m,&dat->rootcounts,m,&dat->rootweights,n,&dat->leafdispls,n,&dat->leafcounts,n,&dat->leafweights);
99: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
100: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
101: dat->rootdispls[j] = rootoffset[i]-rootoffset[ndrootranks];
102: dat->rootcounts[j] = rootoffset[i+1]-rootoffset[i];
103: dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647); /* Scale to range of PetscMPIInt */
104: }
106: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
107: dat->leafdispls[j] = leafoffset[i]-leafoffset[ndleafranks];
108: dat->leafcounts[j] = leafoffset[i+1]-leafoffset[i];
109: dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j]/(PetscReal)PETSC_MAX_INT*2147483647);
110: }
111: #else
112: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
113: PetscMPIIntCast(rootoffset[i]-rootoffset[ndrootranks],&m); dat->rootdispls[j] = m;
114: PetscMPIIntCast(rootoffset[i+1]-rootoffset[i], &n); dat->rootcounts[j] = n;
115: dat->rootweights[j] = n;
116: }
118: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
119: PetscMPIIntCast(leafoffset[i]-leafoffset[ndleafranks],&m); dat->leafdispls[j] = m;
120: PetscMPIIntCast(leafoffset[i+1]-leafoffset[i], &n); dat->leafcounts[j] = n;
121: dat->leafweights[j] = n;
122: }
123: #endif
124: return 0;
125: }
127: static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
128: {
129: PetscInt i;
130: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
133: PetscFree6(dat->rootdispls,dat->rootcounts,dat->rootweights,dat->leafdispls,dat->leafcounts,dat->leafweights);
134: for (i=0; i<2; i++) {
135: if (dat->initialized[i]) {
136: MPI_Comm_free(&dat->comms[i]);
137: dat->initialized[i] = PETSC_FALSE;
138: }
139: }
140: PetscSFReset_Basic(sf); /* Common part */
141: return 0;
142: }
144: static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
145: {
146: PetscSFReset_Neighbor(sf);
147: PetscFree(sf->data);
148: return 0;
149: }
151: static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
152: {
153: PetscSFLink link;
154: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
155: MPI_Comm distcomm = MPI_COMM_NULL;
156: void *rootbuf = NULL,*leafbuf = NULL;
157: MPI_Request *req;
159: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
160: PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
161: /* Do neighborhood alltoallv for remote ranks */
162: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
163: PetscSFGetDistComm_Neighbor(sf,PETSCSF_../../../../../..2LEAF,&distcomm);
164: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,&req,NULL);
165: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
166: /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */
167: if (dat->rootdegree || dat->leafdegree) {
168: MPIU_Ineighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,distcomm,req);
169: }
170: PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);
171: PetscSFLinkScatterLocal(sf,link,PETSCSF_../../../../../..2LEAF,(void*)rootdata,leafdata,op);
172: return 0;
173: }
175: static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
176: {
177: PetscSFLink link;
178: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
179: MPI_Comm distcomm = MPI_COMM_NULL;
180: void *rootbuf = NULL,*leafbuf = NULL;
181: MPI_Request *req = NULL;
183: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);
184: PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
185: /* Do neighborhood alltoallv for remote ranks */
186: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
187: PetscSFGetDistComm_Neighbor(sf,PETSCSF_LEAF2../../../../../..,&distcomm);
188: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,&rootbuf,&leafbuf,&req,NULL);
189: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2../../../../../..);
190: if (dat->rootdegree || dat->leafdegree) {
191: MPIU_Ineighbor_alltoallv(leafbuf,dat->leafcounts,dat->leafdispls,unit,rootbuf,dat->rootcounts,dat->rootdispls,unit,distcomm,req);
192: }
193: PetscLogMPIMessages(dat->leafdegree,dat->leafcounts,unit,dat->rootdegree,dat->rootcounts,unit);
194: *out = link;
195: return 0;
196: }
198: static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
199: {
200: PetscSFLink link = NULL;
202: PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);
203: PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2../../../../../..,rootdata,(void*)leafdata,op);
204: return 0;
205: }
207: static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
208: {
209: PetscSFLink link = NULL;
211: PetscSFLeafToRootBegin_Neighbor(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);
212: PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);
213: return 0;
214: }
216: static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
217: {
218: PetscSFLink link = NULL;
219: MPI_Comm comm = MPI_COMM_NULL;
220: PetscSF_Neighbor *dat = (PetscSF_Neighbor*)sf->data;
221: void *rootbuf = NULL,*leafbuf = NULL;
223: PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
224: PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2../../../../../..);
225: /* Process remote fetch-and-op */
226: PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);
227: /* Bcast the updated rootbuf back to leaves */
228: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
229: PetscSFGetDistComm_Neighbor(sf,PETSCSF_../../../../../..2LEAF,&comm);
230: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,NULL,NULL);
231: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
232: if (dat->rootdegree || dat->leafdegree) {
233: MPIU_Neighbor_alltoallv(rootbuf,dat->rootcounts,dat->rootdispls,unit,leafbuf,dat->leafcounts,dat->leafdispls,unit,comm);
234: }
235: PetscLogMPIMessages(dat->rootdegree,dat->rootcounts,unit,dat->leafdegree,dat->leafcounts,unit);
236: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_FALSE/* host2device after recving */);
237: PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);
238: PetscSFLinkReclaim(sf,&link);
239: return 0;
240: }
242: PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
243: {
244: PetscSF_Neighbor *dat;
246: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
247: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
248: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
249: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
250: sf->ops->View = PetscSFView_Basic;
252: sf->ops->SetUp = PetscSFSetUp_Neighbor;
253: sf->ops->Reset = PetscSFReset_Neighbor;
254: sf->ops->Destroy = PetscSFDestroy_Neighbor;
255: sf->ops->BcastBegin = PetscSFBcastBegin_Neighbor;
256: sf->ops->ReduceBegin = PetscSFReduceBegin_Neighbor;
257: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor;
258: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Neighbor;
260: PetscNewLog(sf,&dat);
261: sf->data = (void*)dat;
262: return 0;
263: }