Actual source code: sfbasic.c
petsc-3.12.5 2020-03-29
2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
4: /*===================================================================================*/
5: /* Internal routines for PetscSFPack */
6: /*===================================================================================*/
8: /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9: initialized (since we use persistent requests), then initialize them.
10: */
11: static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12: {
14: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
15: PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
16: const PetscInt *rootoffset,*leafoffset;
17: PetscMPIInt n;
18: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
19: MPI_Datatype unit = link->unit;
20: PetscMemType rootmtype = link->rootmtype,leafmtype = link->leafmtype;
23: if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
24: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
25: if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
26: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
27: MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
28: PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
29: MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
30: }
31: } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
32: for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
33: MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
34: PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
35: MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
36: }
37: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
38: link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
39: }
41: if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
42: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
43: if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
44: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
45: MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
46: PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
47: MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
48: }
49: } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
50: for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
51: MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
52: PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
53: MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
54: }
55: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
56: link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
57: }
59: if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
60: if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
61: return(0);
62: }
64: /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
65: PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
66: {
67: PetscErrorCode ierr;
68: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
69: PetscInt i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
70: const PetscInt *rootoffset,*leafoffset;
71: PetscSFPack *p,link;
72: PetscBool match;
75: PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);
77: /* Look for types in cache */
78: for (p=&bas->avail; (link=*p); p=&link->next) {
79: MPIPetsc_Type_compare(unit,link->unit,&match);
80: if (match) {
81: *p = link->next; /* Remove from available list */
82: goto found;
83: }
84: }
86: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
87: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
88: PetscNew(&link);
89: PetscSFPackSetUp_Host(sf,link,unit);
90: PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag); /* One tag per link */
92: /* Allocate root, leaf, self buffers, and MPI requests */
93: link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
94: link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
95: link->selfbuflen = rootoffset[ndrootranks];
96: link->nrootreqs = nrootreqs;
97: link->nleafreqs = nleafreqs;
98: nreqs = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
99: PetscMalloc1(nreqs,&link->reqs);
100: for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
102: for (i=0; i<2; i++) { /* Two communication directions */
103: for (j=0; j<2; j++) { /* Two memory types */
104: link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
105: link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
106: }
107: }
109: found:
110: link->rootmtype = rootmtype;
111: link->leafmtype = leafmtype;
112: #if defined(PETSC_HAVE_CUDA)
113: if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {PetscSFPackSetUp_Device(sf,link,unit);}
114: #endif
115: if (!link->rootbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);}
116: if (!link->leafbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);}
117: if (!link->selfbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);}
118: if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);}
119: link->rkey = rootdata;
120: link->lkey = leafdata;
121: link->next = bas->inuse;
122: bas->inuse = link;
124: *mylink = link;
125: return(0);
126: }
128: static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
129: {
130: PetscErrorCode ierr;
131: PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks;
134: PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);
135: PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);
136: PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);
137: return(0);
138: }
140: /*===================================================================================*/
141: /* SF public interface implementations */
142: /*===================================================================================*/
143: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
144: {
146: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
147: PetscInt *rlengths,*ilengths,i;
148: PetscMPIInt rank,niranks,*iranks,tag;
149: MPI_Comm comm;
150: MPI_Group group;
151: MPI_Request *rootreqs,*leafreqs;
154: MPI_Comm_group(PETSC_COMM_SELF,&group);
155: PetscSFSetUpRanks(sf,group);
156: MPI_Group_free(&group);
157: PetscObjectGetComm((PetscObject)sf,&comm);
158: PetscObjectGetNewTag((PetscObject)sf,&tag);
159: MPI_Comm_rank(comm,&rank);
160: /*
161: * Inform roots about how many leaves and from which ranks
162: */
163: PetscMalloc1(sf->nranks,&rlengths);
164: /* Determine number, sending ranks, and length of incoming */
165: for (i=0; i<sf->nranks; i++) {
166: rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
167: }
168: PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);
170: /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
171: We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
172: small and the sorting is cheap.
173: */
174: PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);
176: /* Partition into distinguished and non-distinguished incoming ranks */
177: bas->ndiranks = sf->ndranks;
178: bas->niranks = bas->ndiranks + niranks;
179: PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
180: bas->ioffset[0] = 0;
181: for (i=0; i<bas->ndiranks; i++) {
182: bas->iranks[i] = sf->ranks[i];
183: bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
184: }
185: if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
186: for ( ; i<bas->niranks; i++) {
187: bas->iranks[i] = iranks[i-bas->ndiranks];
188: bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
189: }
190: bas->itotal = bas->ioffset[i];
191: PetscFree(rlengths);
192: PetscFree(iranks);
193: PetscFree(ilengths);
195: /* Send leaf identities to roots */
196: PetscMalloc1(bas->itotal,&bas->irootloc);
197: PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
198: for (i=bas->ndiranks; i<bas->niranks; i++) {
199: MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);
200: }
201: for (i=0; i<sf->nranks; i++) {
202: PetscMPIInt npoints;
203: PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
204: if (i < sf->ndranks) {
205: if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
206: if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
207: if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
208: PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);
209: continue;
210: }
211: MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);
212: }
213: MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
214: MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
215: PetscFree2(rootreqs,leafreqs);
217: sf->selfleafdups = PETSC_TRUE; /* The conservative assumption is there are data race */
218: sf->remoteleafdups = PETSC_TRUE;
219: bas->selfrootdups = PETSC_TRUE;
220: bas->remoterootdups = PETSC_TRUE;
222: /* Setup packing optimization for roots and leaves */
223: PetscSFPackSetupOptimizations_Basic(sf);
224: return(0);
225: }
227: static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
228: {
232: PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");
233: PetscOptionsTail();
234: return(0);
235: }
237: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
238: {
239: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
240: PetscErrorCode ierr;
243: if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
244: PetscFree2(bas->iranks,bas->ioffset);
245: PetscFree(bas->irootloc);
246: #if defined(PETSC_HAVE_CUDA)
247: if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
248: #endif
249: PetscSFPackDestroyAvailable(&bas->avail);
250: PetscSFPackDestroyOptimizations_Basic(sf);
251: return(0);
252: }
254: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
255: {
259: PetscSFReset(sf);
260: PetscFree(sf->data);
261: return(0);
262: }
264: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
265: {
267: PetscBool iascii;
270: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
271: if (iascii) {PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");}
272: return(0);
273: }
275: static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
276: {
277: PetscErrorCode ierr;
278: PetscSFPack link;
279: const PetscInt *rootloc = NULL;
280: MPI_Request *rootreqs = NULL,*leafreqs = NULL;
283: PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_../../../../../..2LEAF_BCAST,&link);
284: PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
286: PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
287: /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
288: MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);
290: /* Do Isend */
291: PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);
292: MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);
294: /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
295: if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}
296: return(0);
297: }
299: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
300: {
301: PetscErrorCode ierr;
302: PetscSFPack link;
303: const PetscInt *leafloc = NULL;
306: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
307: PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
308: PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
309: PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);
310: PetscSFPackReclaim(sf,&link);
311: return(0);
312: }
314: /* leaf -> root with reduction */
315: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
316: {
317: PetscErrorCode ierr;
318: PetscSFPack link;
319: const PetscInt *leafloc = NULL;
320: MPI_Request *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */
323: PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
325: PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2../../../../../.._REDUCE,&link);
326: PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2../../../../../.._REDUCE,&rootreqs,&leafreqs);
327: /* Eagerly post root receives for non-distinguished ranks */
328: MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);
330: /* Pack and send leaf data */
331: PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);
332: MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);
334: if (rootmtype != leafmtype) {PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);}
335: return(0);
336: }
338: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
339: {
340: PetscErrorCode ierr;
341: PetscSFPack link;
342: const PetscInt *rootloc = NULL;
345: PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
346: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
347: PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
348: PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
349: PetscSFPackReclaim(sf,&link);
350: return(0);
351: }
353: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
354: {
358: PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);
359: return(0);
360: }
362: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
363: {
364: PetscErrorCode ierr;
365: PetscSFPack link;
366: const PetscInt *rootloc = NULL,*leafloc = NULL;
367: MPI_Request *rootreqs = NULL,*leafreqs = NULL;
370: PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
371: PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
372: PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
373: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
374: PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
375: PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
377: /* Post leaf receives */
378: MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);
380: /* Process local fetch-and-op, post root sends */
381: PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
382: MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);
383: if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}
385: /* Unpack and insert fetched data into leaves */
386: PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
387: PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);
388: PetscSFPackReclaim(sf,&link);
389: return(0);
390: }
392: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
393: {
394: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
397: if (niranks) *niranks = bas->niranks;
398: if (iranks) *iranks = bas->iranks;
399: if (ioffset) *ioffset = bas->ioffset;
400: if (irootloc) *irootloc = bas->irootloc;
401: return(0);
402: }
404: /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
405: We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
406: was sorted before calling the routine.
407: */
408: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
409: {
410: PetscSF esf;
411: PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
412: PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
413: PetscMPIInt *esf_ranks;
414: const PetscMPIInt *ranks,*iranks;
415: const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
416: PetscBool connected;
417: PetscSFPack link;
418: PetscSFNode *new_iremote;
419: PetscSF_Basic *bas;
420: PetscErrorCode ierr;
423: PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
424: PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
426: /* Find out which leaves are still connected to roots in the embedded sf */
427: PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);
428: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
429: /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
430: maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
431: PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
432: /* Tag selected roots */
433: for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
435: /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
436: sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
437: interested in since it tells which leaves are connected to which ranks.
438: */
439: PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE); /* Need to give leafdata but we won't use it */
440: PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
441: PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
442: PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote); /* Get send info */
443: esf_nranks = esf_ndranks = connected_leaves = 0;
444: for (i=0; i<nranks; i++) {
445: connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
446: buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
447: count = roffset[i+1] - roffset[i];
448: for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
449: if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
450: }
452: /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
453: PetscMalloc1(connected_leaves,&new_ilocal);
454: PetscMalloc1(connected_leaves,&new_iremote);
455: PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);
456: p = 0; /* Counter for connected root ranks */
457: q = 0; /* Counter for connected leaves */
458: esf_roffset[0] = 0;
459: for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
460: buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
461: connected = PETSC_FALSE;
462: for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
463: if (buffer[k]) {
464: esf_rmine[q] = new_ilocal[q] = rmine[j];
465: esf_rremote[q] = rremote[j];
466: new_iremote[q].index = rremote[j];
467: new_iremote[q].rank = ranks[i];
468: connected = PETSC_TRUE;
469: q++;
470: }
471: }
472: if (connected) {
473: esf_ranks[p] = ranks[i];
474: esf_roffset[p+1] = q;
475: p++;
476: }
477: }
479: PetscSFPackReclaim(sf,&link);
481: /* SetGraph internally resets the SF, so we only set its fields after the call */
482: PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
483: esf->nranks = esf_nranks;
484: esf->ndranks = esf_ndranks;
485: esf->ranks = esf_ranks;
486: esf->roffset = esf_roffset;
487: esf->rmine = esf_rmine;
488: esf->rremote = esf_rremote;
490: /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
491: bas = (PetscSF_Basic*)esf->data;
492: PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc); /* Get recv info */
493: /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
494: expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
495: */
496: PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
497: PetscMalloc1(ioffset[niranks],&bas->irootloc);
498: bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
499: p = 0; /* Counter for connected leaf ranks */
500: q = 0; /* Counter for connected roots */
501: for (i=0; i<niranks; i++) {
502: connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
503: for (j=ioffset[i]; j<ioffset[i+1]; j++) {
504: PetscInt loc;
505: PetscFindInt(irootloc[j],nselected,selected,&loc);
506: if (loc >= 0) { /* Found in selected this root is connected */
507: bas->irootloc[q++] = irootloc[j];
508: connected = PETSC_TRUE;
509: }
510: }
511: if (connected) {
512: bas->niranks++;
513: if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
514: bas->iranks[p] = iranks[i];
515: bas->ioffset[p+1] = q;
516: p++;
517: }
518: }
519: bas->itotal = q;
521: /* Setup packing optimizations */
522: PetscSFPackSetupOptimizations_Basic(esf);
523: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
525: PetscFree2(rootdata,leafdata);
526: *newsf = esf;
527: return(0);
528: }
530: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
531: {
532: PetscSF esf;
533: PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
534: const PetscInt *ilocal,*ioffset,*irootloc,*buffer;
535: const PetscMPIInt *iranks;
536: PetscSFPack link;
537: PetscSFNode *new_iremote;
538: const PetscSFNode *iremote;
539: PetscSF_Basic *bas;
540: MPI_Group group;
541: PetscErrorCode ierr;
544: PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
545: PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
547: /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
548: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
549: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
550: PetscMalloc1(nselected,&new_ilocal);
551: PetscMalloc1(nselected,&new_iremote);
552: for (i=0; i<nselected; i++) {
553: const PetscInt l = selected[i];
554: new_ilocal[i] = ilocal ? ilocal[l] : l;
555: new_iremote[i].rank = iremote[l].rank;
556: new_iremote[i].index = iremote[l].index;
557: }
559: /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
560: maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
561: PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
562: for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
564: PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
566: /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
567: map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices).
568: */
569: MPI_Comm_group(PETSC_COMM_SELF,&group);
570: PetscSFSetUpRanks(esf,group);
571: MPI_Group_free(&group);
573: /* Set up the incoming communication (i.e., recv info) */
574: PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);
575: bas = (PetscSF_Basic*)esf->data;
576: PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
577: PetscMalloc1(ioffset[niranks],&bas->irootloc);
579: /* Pass info about selected leaves to root buffer */
580: PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE); /* -minleaf to re-adjust start address of leafdata */
581: PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
582: PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
584: bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
585: p = 0; /* Counter for connected leaf ranks */
586: q = 0; /* Counter for connected roots */
587: for (i=0; i<niranks; i++) {
588: PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
589: buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
590: for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
591: if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
592: }
593: if (connected) {
594: bas->niranks++;
595: if (i<ndiranks) bas->ndiranks++;
596: bas->iranks[p] = iranks[i];
597: bas->ioffset[p+1] = q;
598: p++;
599: }
600: }
601: bas->itotal = q;
602: PetscSFPackReclaim(sf,&link);
604: /* Setup packing optimizations */
605: PetscSFPackSetupOptimizations_Basic(esf);
606: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
608: PetscFree2(rootdata,leafdata);
609: *newsf = esf;
610: return(0);
611: }
613: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
614: {
615: PetscSF_Basic *dat;
619: sf->ops->SetUp = PetscSFSetUp_Basic;
620: sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic;
621: sf->ops->Reset = PetscSFReset_Basic;
622: sf->ops->Destroy = PetscSFDestroy_Basic;
623: sf->ops->View = PetscSFView_Basic;
624: sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic;
625: sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic;
626: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
627: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
628: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
629: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
630: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
631: sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic;
632: sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
634: PetscNewLog(sf,&dat);
635: sf->data = (void*)dat;
636: return(0);
637: }