Actual source code: sfbasic.c

petsc-3.12.5 2020-03-29
Report Typos and Errors

  2:  #include <../src/vec/is/sf/impls/basic/sfbasic.h>

  4: /*===================================================================================*/
  5: /*              Internal routines for PetscSFPack                              */
  6: /*===================================================================================*/

  8: /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
  9:    initialized (since we use persistent requests), then initialize them.
 10: */
 11: static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
 12: {
 14:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
 15:   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
 16:   const PetscInt *rootoffset,*leafoffset;
 17:   PetscMPIInt    n;
 18:   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
 19:   MPI_Datatype   unit = link->unit;
 20:   PetscMemType   rootmtype = link->rootmtype,leafmtype = link->leafmtype;

 23:   if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
 24:     PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 25:     if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
 26:       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
 27:         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
 28:         PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 29:         MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
 30:       }
 31:     } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
 32:       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
 33:         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
 34:         PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);
 35:         MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);
 36:       }
 37:     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
 38:     link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
 39:   }

 41:   if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
 42:     PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 43:     if (direction == PETSCSF_LEAF2../../../../../.._REDUCE) {
 44:       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
 45:         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
 46:         PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 47:         MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
 48:       }
 49:     } else if (direction == PETSCSF_../../../../../..2LEAF_BCAST) {
 50:       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
 51:         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
 52:         PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);
 53:         MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);
 54:       }
 55:     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
 56:     link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
 57:   }

 59:   if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
 60:   if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
 61:   return(0);
 62: }

 64: /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
 65: PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
 66: {
 67:   PetscErrorCode    ierr;
 68:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
 69:   PetscInt          i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
 70:   const PetscInt    *rootoffset,*leafoffset;
 71:   PetscSFPack       *p,link;
 72:   PetscBool         match;

 75:   PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);

 77:   /* Look for types in cache */
 78:   for (p=&bas->avail; (link=*p); p=&link->next) {
 79:     MPIPetsc_Type_compare(unit,link->unit,&match);
 80:     if (match) {
 81:       *p = link->next; /* Remove from available list */
 82:       goto found;
 83:     }
 84:   }

 86:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
 87:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);
 88:   PetscNew(&link);
 89:   PetscSFPackSetUp_Host(sf,link,unit);
 90:   PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag); /* One tag per link */

 92:   /* Allocate root, leaf, self buffers, and MPI requests */
 93:   link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
 94:   link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
 95:   link->selfbuflen = rootoffset[ndrootranks];
 96:   link->nrootreqs  = nrootreqs;
 97:   link->nleafreqs  = nleafreqs;
 98:   nreqs            = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
 99:   PetscMalloc1(nreqs,&link->reqs);
100:   for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */

102:   for (i=0; i<2; i++) { /* Two communication directions */
103:     for (j=0; j<2; j++) { /* Two memory types */
104:       link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
105:       link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
106:     }
107:   }

109: found:
110:   link->rootmtype = rootmtype;
111:   link->leafmtype = leafmtype;
112: #if defined(PETSC_HAVE_CUDA)
113:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {PetscSFPackSetUp_Device(sf,link,unit);}
114: #endif
115:   if (!link->rootbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);}
116:   if (!link->leafbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);}
117:   if (!link->selfbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);}
118:   if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);}
119:   link->rkey = rootdata;
120:   link->lkey = leafdata;
121:   link->next = bas->inuse;
122:   bas->inuse = link;

124:   *mylink    = link;
125:   return(0);
126: }

128: static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
129: {
130:   PetscErrorCode    ierr;
131:   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;

134:   PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);
135:   PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);
136:   PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);
137:   return(0);
138: }

140: /*===================================================================================*/
141: /*              SF public interface implementations                                  */
142: /*===================================================================================*/
143: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
144: {
146:   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
147:   PetscInt       *rlengths,*ilengths,i;
148:   PetscMPIInt    rank,niranks,*iranks,tag;
149:   MPI_Comm       comm;
150:   MPI_Group      group;
151:   MPI_Request    *rootreqs,*leafreqs;

154:   MPI_Comm_group(PETSC_COMM_SELF,&group);
155:   PetscSFSetUpRanks(sf,group);
156:   MPI_Group_free(&group);
157:   PetscObjectGetComm((PetscObject)sf,&comm);
158:   PetscObjectGetNewTag((PetscObject)sf,&tag);
159:   MPI_Comm_rank(comm,&rank);
160:   /*
161:    * Inform roots about how many leaves and from which ranks
162:    */
163:   PetscMalloc1(sf->nranks,&rlengths);
164:   /* Determine number, sending ranks, and length of incoming */
165:   for (i=0; i<sf->nranks; i++) {
166:     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
167:   }
168:   PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);

170:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
171:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
172:      small and the sorting is cheap.
173:    */
174:   PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);

176:   /* Partition into distinguished and non-distinguished incoming ranks */
177:   bas->ndiranks = sf->ndranks;
178:   bas->niranks = bas->ndiranks + niranks;
179:   PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
180:   bas->ioffset[0] = 0;
181:   for (i=0; i<bas->ndiranks; i++) {
182:     bas->iranks[i] = sf->ranks[i];
183:     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
184:   }
185:   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
186:   for ( ; i<bas->niranks; i++) {
187:     bas->iranks[i] = iranks[i-bas->ndiranks];
188:     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
189:   }
190:   bas->itotal = bas->ioffset[i];
191:   PetscFree(rlengths);
192:   PetscFree(iranks);
193:   PetscFree(ilengths);

195:   /* Send leaf identities to roots */
196:   PetscMalloc1(bas->itotal,&bas->irootloc);
197:   PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
198:   for (i=bas->ndiranks; i<bas->niranks; i++) {
199:     MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);
200:   }
201:   for (i=0; i<sf->nranks; i++) {
202:     PetscMPIInt npoints;
203:     PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
204:     if (i < sf->ndranks) {
205:       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
206:       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
207:       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
208:       PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);
209:       continue;
210:     }
211:     MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);
212:   }
213:   MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
214:   MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
215:   PetscFree2(rootreqs,leafreqs);

217:   sf->selfleafdups    = PETSC_TRUE; /* The conservative assumption is there are data race */
218:   sf->remoteleafdups  = PETSC_TRUE;
219:   bas->selfrootdups   = PETSC_TRUE;
220:   bas->remoterootdups = PETSC_TRUE;

222:   /* Setup packing optimization for roots and leaves */
223:   PetscSFPackSetupOptimizations_Basic(sf);
224:   return(0);
225: }

227: static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
228: {

232:   PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");
233:   PetscOptionsTail();
234:   return(0);
235: }

237: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
238: {
239:   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
240:   PetscErrorCode    ierr;

243:   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
244:   PetscFree2(bas->iranks,bas->ioffset);
245:   PetscFree(bas->irootloc);
246: #if defined(PETSC_HAVE_CUDA)
247:   if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
248: #endif
249:   PetscSFPackDestroyAvailable(&bas->avail);
250:   PetscSFPackDestroyOptimizations_Basic(sf);
251:   return(0);
252: }

254: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
255: {

259:   PetscSFReset(sf);
260:   PetscFree(sf->data);
261:   return(0);
262: }

264: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
265: {
267:   PetscBool      iascii;

270:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
271:   if (iascii) {PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");}
272:   return(0);
273: }

275: static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
276: {
277:   PetscErrorCode    ierr;
278:   PetscSFPack       link;
279:   const PetscInt    *rootloc = NULL;
280:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;

283:   PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_../../../../../..2LEAF_BCAST,&link);
284:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);

286:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);
287:   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
288:   MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);

290:   /* Do Isend */
291:   PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);
292:   MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);

294:   /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
295:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}
296:   return(0);
297: }

299: PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
300: {
301:   PetscErrorCode    ierr;
302:   PetscSFPack       link;
303:   const PetscInt    *leafloc = NULL;

306:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
307:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
308:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
309:   PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);
310:   PetscSFPackReclaim(sf,&link);
311:   return(0);
312: }

314: /* leaf -> root with reduction */
315: static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
316: {
317:   PetscErrorCode    ierr;
318:   PetscSFPack       link;
319:   const PetscInt    *leafloc = NULL;
320:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */

323:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);

325:   PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2../../../../../.._REDUCE,&link);
326:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2../../../../../.._REDUCE,&rootreqs,&leafreqs);
327:   /* Eagerly post root receives for non-distinguished ranks */
328:   MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);

330:   /* Pack and send leaf data */
331:   PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);
332:   MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);

334:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);}
335:   return(0);
336: }

338: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
339: {
340:   PetscErrorCode    ierr;
341:   PetscSFPack       link;
342:   const PetscInt    *rootloc = NULL;

345:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
346:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
347:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
348:   PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
349:   PetscSFPackReclaim(sf,&link);
350:   return(0);
351: }

353: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
354: {

358:   PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);
359:   return(0);
360: }

362: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
363: {
364:   PetscErrorCode    ierr;
365:   PetscSFPack       link;
366:   const PetscInt    *rootloc = NULL,*leafloc = NULL;
367:   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;

370:   PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);
371:   PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
372:   PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);
373:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
374:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);
375:   PetscSFPackGetReqs_Basic(sf,link,PETSCSF_../../../../../..2LEAF_BCAST,&rootreqs,&leafreqs);

377:   /* Post leaf receives */
378:   MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);

380:   /* Process local fetch-and-op, post root sends */
381:   PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);
382:   MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);
383:   if (rootmtype != leafmtype) {PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);}

385:   /* Unpack and insert fetched data into leaves */
386:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
387:   PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);
388:   PetscSFPackReclaim(sf,&link);
389:   return(0);
390: }

392: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
393: {
394:   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;

397:   if (niranks)  *niranks  = bas->niranks;
398:   if (iranks)   *iranks   = bas->iranks;
399:   if (ioffset)  *ioffset  = bas->ioffset;
400:   if (irootloc) *irootloc = bas->irootloc;
401:   return(0);
402: }

404: /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
405:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
406:    was sorted before calling the routine.
407:  */
408: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
409: {
410:   PetscSF           esf;
411:   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
412:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
413:   PetscMPIInt       *esf_ranks;
414:   const PetscMPIInt *ranks,*iranks;
415:   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
416:   PetscBool         connected;
417:   PetscSFPack       link;
418:   PetscSFNode       *new_iremote;
419:   PetscSF_Basic     *bas;
420:   PetscErrorCode    ierr;

423:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
424:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

426:   /* Find out which leaves are still connected to roots in the embedded sf */
427:   PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);
428:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
429:   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
430:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
431:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
432:   /* Tag selected roots */
433:   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;

435:   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
436:      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
437:      interested in since it tells which leaves are connected to which ranks.
438:    */
439:   PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE); /* Need to give leafdata but we won't use it */
440:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
441:   PetscSFPackWaitall_Basic(link,PETSCSF_../../../../../..2LEAF_BCAST);
442:   PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote); /* Get send info */
443:   esf_nranks = esf_ndranks = connected_leaves = 0;
444:   for (i=0; i<nranks; i++) {
445:     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
446:     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
447:     count     = roffset[i+1] - roffset[i];
448:     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
449:     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
450:   }

452:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
453:   PetscMalloc1(connected_leaves,&new_ilocal);
454:   PetscMalloc1(connected_leaves,&new_iremote);
455:   PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);
456:   p    = 0; /* Counter for connected root ranks */
457:   q    = 0; /* Counter for connected leaves */
458:   esf_roffset[0] = 0;
459:   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
460:     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
461:     connected = PETSC_FALSE;
462:     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
463:         if (buffer[k]) {
464:         esf_rmine[q]         = new_ilocal[q] = rmine[j];
465:         esf_rremote[q]       = rremote[j];
466:         new_iremote[q].index = rremote[j];
467:         new_iremote[q].rank  = ranks[i];
468:         connected            = PETSC_TRUE;
469:         q++;
470:       }
471:     }
472:     if (connected) {
473:       esf_ranks[p]     = ranks[i];
474:       esf_roffset[p+1] = q;
475:       p++;
476:     }
477:   }

479:   PetscSFPackReclaim(sf,&link);

481:   /* SetGraph internally resets the SF, so we only set its fields after the call */
482:   PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
483:   esf->nranks  = esf_nranks;
484:   esf->ndranks = esf_ndranks;
485:   esf->ranks   = esf_ranks;
486:   esf->roffset = esf_roffset;
487:   esf->rmine   = esf_rmine;
488:   esf->rremote = esf_rremote;

490:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
491:   bas  = (PetscSF_Basic*)esf->data;
492:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc); /* Get recv info */
493:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
494:      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
495:    */
496:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
497:   PetscMalloc1(ioffset[niranks],&bas->irootloc);
498:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
499:   p = 0; /* Counter for connected leaf ranks */
500:   q = 0; /* Counter for connected roots */
501:   for (i=0; i<niranks; i++) {
502:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
503:     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
504:       PetscInt loc;
505:       PetscFindInt(irootloc[j],nselected,selected,&loc);
506:       if (loc >= 0) { /* Found in selected this root is connected */
507:         bas->irootloc[q++] = irootloc[j];
508:         connected = PETSC_TRUE;
509:       }
510:     }
511:     if (connected) {
512:       bas->niranks++;
513:       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
514:       bas->iranks[p]    = iranks[i];
515:       bas->ioffset[p+1] = q;
516:       p++;
517:     }
518:   }
519:   bas->itotal = q;

521:   /* Setup packing optimizations */
522:   PetscSFPackSetupOptimizations_Basic(esf);
523:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

525:   PetscFree2(rootdata,leafdata);
526:   *newsf = esf;
527:   return(0);
528: }

530: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
531: {
532:   PetscSF           esf;
533:   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
534:   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
535:   const PetscMPIInt *iranks;
536:   PetscSFPack       link;
537:   PetscSFNode       *new_iremote;
538:   const PetscSFNode *iremote;
539:   PetscSF_Basic     *bas;
540:   MPI_Group         group;
541:   PetscErrorCode    ierr;

544:   PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);
545:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */

547:   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
548:   PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
549:   PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
550:   PetscMalloc1(nselected,&new_ilocal);
551:   PetscMalloc1(nselected,&new_iremote);
552:   for (i=0; i<nselected; i++) {
553:     const PetscInt l     = selected[i];
554:     new_ilocal[i]        = ilocal ? ilocal[l] : l;
555:     new_iremote[i].rank  = iremote[l].rank;
556:     new_iremote[i].index = iremote[l].index;
557:   }

559:   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
560:   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
561:   PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);
562:   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */

564:   PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);

566:   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
567:      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
568:    */
569:   MPI_Comm_group(PETSC_COMM_SELF,&group);
570:   PetscSFSetUpRanks(esf,group);
571:   MPI_Group_free(&group);

573:   /* Set up the incoming communication (i.e., recv info) */
574:   PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);
575:   bas  = (PetscSF_Basic*)esf->data;
576:   PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);
577:   PetscMalloc1(ioffset[niranks],&bas->irootloc);

579:   /* Pass info about selected leaves to root buffer */
580:   PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE); /* -minleaf to re-adjust start address of leafdata */
581:   PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);
582:   PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2../../../../../.._REDUCE);

584:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
585:   p = 0; /* Counter for connected leaf ranks */
586:   q = 0; /* Counter for connected roots */
587:   for (i=0; i<niranks; i++) {
588:     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
589:     buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
590:     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
591:       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
592:     }
593:     if (connected) {
594:       bas->niranks++;
595:       if (i<ndiranks) bas->ndiranks++;
596:       bas->iranks[p]    = iranks[i];
597:       bas->ioffset[p+1] = q;
598:       p++;
599:     }
600:   }
601:   bas->itotal = q;
602:   PetscSFPackReclaim(sf,&link);

604:   /* Setup packing optimizations */
605:   PetscSFPackSetupOptimizations_Basic(esf);
606:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */

608:   PetscFree2(rootdata,leafdata);
609:   *newsf = esf;
610:   return(0);
611: }

613: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
614: {
615:   PetscSF_Basic  *dat;

619:   sf->ops->SetUp                = PetscSFSetUp_Basic;
620:   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
621:   sf->ops->Reset                = PetscSFReset_Basic;
622:   sf->ops->Destroy              = PetscSFDestroy_Basic;
623:   sf->ops->View                 = PetscSFView_Basic;
624:   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
625:   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
626:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
627:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
628:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
629:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
630:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
631:   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
632:   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;

634:   PetscNewLog(sf,&dat);
635:   sf->data = (void*)dat;
636:   return(0);
637: }