Actual source code: sf.c

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1:  #include <petsc/private/sfimpl.h>
  2:  #include <petsc/private/hashseti.h>
  3:  #include <petscctable.h>

  5: #if defined(PETSC_USE_DEBUG)
  6: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
  7:     if (PetscUnlikely(!(sf)->graphset))                              \
  8:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
  9:   } while (0)
 10: #else
 11: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 12: #endif

 14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};

 16: /*@
 17:    PetscSFCreate - create a star forest communication context

 19:    Collective

 21:    Input Arguments:
 22: .  comm - communicator on which the star forest will operate

 24:    Output Arguments:
 25: .  sf - new star forest context

 27:    Options Database Keys:
 28: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 29: .  -sf_type window    -Use MPI-3 one-sided window for communication
 30: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 32:    Level: intermediate

 34:    Notes:
 35:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 36:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 37:    SFs are optimized and they have better performance than general SFs.

 39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 40: @*/
 41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 42: {
 44:   PetscSF        b;

 48:   PetscSFInitializePackage();

 50:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 52:   b->nroots    = -1;
 53:   b->nleaves   = -1;
 54:   b->minleaf   = PETSC_MAX_INT;
 55:   b->maxleaf   = PETSC_MIN_INT;
 56:   b->nranks    = -1;
 57:   b->rankorder = PETSC_TRUE;
 58:   b->ingroup   = MPI_GROUP_NULL;
 59:   b->outgroup  = MPI_GROUP_NULL;
 60:   b->graphset  = PETSC_FALSE;

 62:   *sf = b;
 63:   return(0);
 64: }

 66: /*@
 67:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

 69:    Collective

 71:    Input Arguments:
 72: .  sf - star forest

 74:    Level: advanced

 76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
 77: @*/
 78: PetscErrorCode PetscSFReset(PetscSF sf)
 79: {

 84:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
 85:   sf->nroots   = -1;
 86:   sf->nleaves  = -1;
 87:   sf->minleaf  = PETSC_MAX_INT;
 88:   sf->maxleaf  = PETSC_MIN_INT;
 89:   sf->mine     = NULL;
 90:   sf->remote   = NULL;
 91:   sf->graphset = PETSC_FALSE;
 92:   PetscFree(sf->mine_alloc);
 93:   PetscFree(sf->remote_alloc);
 94:   sf->nranks = -1;
 95:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
 96: #if defined(PETSC_HAVE_CUDA)
 97:   if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;}
 98: #endif
 99:   sf->degreeknown = PETSC_FALSE;
100:   PetscFree(sf->degree);
101:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
102:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
103:   PetscSFDestroy(&sf->multi);
104:   PetscLayoutDestroy(&sf->map);
105:   sf->setupcalled = PETSC_FALSE;
106:   return(0);
107: }

109: /*@C
110:    PetscSFSetType - Set the PetscSF communication implementation

112:    Collective on PetscSF

114:    Input Parameters:
115: +  sf - the PetscSF context
116: -  type - a known method

118:    Options Database Key:
119: .  -sf_type <type> - Sets the method; use -help for a list
120:    of available methods (for instance, window, pt2pt, neighbor)

122:    Notes:
123:    See "include/petscsf.h" for available methods (for instance)
124: +    PETSCSFWINDOW - MPI-2/3 one-sided
125: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

127:   Level: intermediate

129: .seealso: PetscSFType, PetscSFCreate()
130: @*/
131: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
132: {
133:   PetscErrorCode ierr,(*r)(PetscSF);
134:   PetscBool      match;


140:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
141:   if (match) return(0);

143:   PetscFunctionListFind(PetscSFList,type,&r);
144:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
145:   /* Destroy the previous PetscSF implementation context */
146:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
147:   PetscMemzero(sf->ops,sizeof(*sf->ops));
148:   PetscObjectChangeTypeName((PetscObject)sf,type);
149:   (*r)(sf);
150:   return(0);
151: }

153: /*@C
154:   PetscSFGetType - Get the PetscSF communication implementation

156:   Not Collective

158:   Input Parameter:
159: . sf  - the PetscSF context

161:   Output Parameter:
162: . type - the PetscSF type name

164:   Level: intermediate

166: .seealso: PetscSFSetType(), PetscSFCreate()
167: @*/
168: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
169: {
173:   *type = ((PetscObject)sf)->type_name;
174:   return(0);
175: }

177: /*@
178:    PetscSFDestroy - destroy star forest

180:    Collective

182:    Input Arguments:
183: .  sf - address of star forest

185:    Level: intermediate

187: .seealso: PetscSFCreate(), PetscSFReset()
188: @*/
189: PetscErrorCode PetscSFDestroy(PetscSF *sf)
190: {

194:   if (!*sf) return(0);
196:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
197:   PetscSFReset(*sf);
198:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
199:   PetscHeaderDestroy(sf);
200:   return(0);
201: }

203: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
204: {
205: #if defined(PETSC_USE_DEBUG)
206:   PetscInt           i, nleaves;
207:   PetscMPIInt        size;
208:   const PetscInt    *ilocal;
209:   const PetscSFNode *iremote;
210:   PetscErrorCode     ierr;

213:   if (!sf->graphset) return(0);
214:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
215:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
216:   for (i = 0; i < nleaves; i++) {
217:     const PetscInt rank = iremote[i].rank;
218:     const PetscInt remote = iremote[i].index;
219:     const PetscInt leaf = ilocal ? ilocal[i] : i;
220:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
221:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
222:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
223:   }
224:   return(0);
225: #else
227:   return(0);
228: #endif
229: }

231: /*@
232:    PetscSFSetUp - set up communication structures

234:    Collective

236:    Input Arguments:
237: .  sf - star forest communication object

239:    Level: beginner

241: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
242: @*/
243: PetscErrorCode PetscSFSetUp(PetscSF sf)
244: {

249:   PetscSFCheckGraphSet(sf,1);
250:   if (sf->setupcalled) return(0);
251:   PetscSFCheckGraphValid_Private(sf);
252:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
253:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
254:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
255:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
256:   sf->setupcalled = PETSC_TRUE;
257:   return(0);
258: }

260: /*@
261:    PetscSFSetFromOptions - set PetscSF options using the options database

263:    Logically Collective

265:    Input Arguments:
266: .  sf - star forest

268:    Options Database Keys:
269: +  -sf_type - implementation type, see PetscSFSetType()
270: -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise

272:    Level: intermediate

274: .seealso: PetscSFWindowSetSyncType()
275: @*/
276: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
277: {
278:   PetscSFType    deft;
279:   char           type[256];
281:   PetscBool      flg;

285:   PetscObjectOptionsBegin((PetscObject)sf);
286:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
287:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
288:   PetscSFSetType(sf,flg ? type : deft);
289:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
290:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
291:   PetscOptionsEnd();
292:   return(0);
293: }

295: /*@
296:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

298:    Logically Collective

300:    Input Arguments:
301: +  sf - star forest
302: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

304:    Level: advanced

306: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
307: @*/
308: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
309: {

314:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
315:   sf->rankorder = flg;
316:   return(0);
317: }

319: /*@
320:    PetscSFSetGraph - Set a parallel star forest

322:    Collective

324:    Input Arguments:
325: +  sf - star forest
326: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
327: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
328: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
329: during setup in debug mode)
330: .  localmode - copy mode for ilocal
331: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
332: during setup in debug mode)
333: -  remotemode - copy mode for iremote

335:    Level: intermediate

337:    Notes:
338:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

340:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
341:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
342:    needed

344:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
345:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

347: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
348: @*/
349: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
350: {

357:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
358:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

360:   PetscSFReset(sf);
361:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

363:   sf->nroots  = nroots;
364:   sf->nleaves = nleaves;

366:   if (nleaves && ilocal) {
367:     PetscInt i;
368:     PetscInt minleaf = PETSC_MAX_INT;
369:     PetscInt maxleaf = PETSC_MIN_INT;
370:     int      contiguous = 1;
371:     for (i=0; i<nleaves; i++) {
372:       minleaf = PetscMin(minleaf,ilocal[i]);
373:       maxleaf = PetscMax(maxleaf,ilocal[i]);
374:       contiguous &= (ilocal[i] == i);
375:     }
376:     sf->minleaf = minleaf;
377:     sf->maxleaf = maxleaf;
378:     if (contiguous) {
379:       if (localmode == PETSC_OWN_POINTER) {
380:         PetscFree(ilocal);
381:       }
382:       ilocal = NULL;
383:     }
384:   } else {
385:     sf->minleaf = 0;
386:     sf->maxleaf = nleaves - 1;
387:   }

389:   if (ilocal) {
390:     switch (localmode) {
391:     case PETSC_COPY_VALUES:
392:       PetscMalloc1(nleaves,&sf->mine_alloc);
393:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
394:       sf->mine = sf->mine_alloc;
395:       break;
396:     case PETSC_OWN_POINTER:
397:       sf->mine_alloc = (PetscInt*)ilocal;
398:       sf->mine       = sf->mine_alloc;
399:       break;
400:     case PETSC_USE_POINTER:
401:       sf->mine_alloc = NULL;
402:       sf->mine       = (PetscInt*)ilocal;
403:       break;
404:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
405:     }
406:   }

408:   switch (remotemode) {
409:   case PETSC_COPY_VALUES:
410:     PetscMalloc1(nleaves,&sf->remote_alloc);
411:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
412:     sf->remote = sf->remote_alloc;
413:     break;
414:   case PETSC_OWN_POINTER:
415:     sf->remote_alloc = (PetscSFNode*)iremote;
416:     sf->remote       = sf->remote_alloc;
417:     break;
418:   case PETSC_USE_POINTER:
419:     sf->remote_alloc = NULL;
420:     sf->remote       = (PetscSFNode*)iremote;
421:     break;
422:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
423:   }

425:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
426:   sf->graphset = PETSC_TRUE;
427:   return(0);
428: }

430: /*@
431:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

433:   Collective

435:   Input Parameters:
436: + sf      - The PetscSF
437: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
438: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

440:   Notes:
441:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
442:   n and N are local and global sizes of x respectively.

444:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
445:   sequential vectors y on all ranks.

447:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
448:   sequential vector y on rank 0.

450:   In above cases, entries of x are roots and entries of y are leaves.

452:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
453:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
454:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
455:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
456:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

458:   In this case, roots and leaves are symmetric.

460:   Level: intermediate
461:  @*/
462: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
463: {
464:   MPI_Comm       comm;
465:   PetscInt       n,N,res[2];
466:   PetscMPIInt    rank,size;
467:   PetscSFType    type;

471:   PetscObjectGetComm((PetscObject)sf, &comm);
472:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
473:   MPI_Comm_rank(comm,&rank);
474:   MPI_Comm_size(comm,&size);

476:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
477:     type = PETSCSFALLTOALL;
478:     PetscLayoutCreate(comm,&sf->map);
479:     PetscLayoutSetLocalSize(sf->map,size);
480:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
481:     PetscLayoutSetUp(sf->map);
482:   } else {
483:     PetscLayoutGetLocalSize(map,&n);
484:     PetscLayoutGetSize(map,&N);
485:     res[0] = n;
486:     res[1] = -n;
487:     /* Check if n are same over all ranks so that we can optimize it */
488:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
489:     if (res[0] == -res[1]) { /* same n */
490:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
491:     } else {
492:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
493:     }
494:     PetscLayoutReference(map,&sf->map);
495:   }
496:   PetscSFSetType(sf,type);

498:   sf->pattern = pattern;
499:   sf->mine    = NULL; /* Contiguous */

501:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
502:      Also set other easy stuff.
503:    */
504:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
505:     sf->nleaves      = N;
506:     sf->nroots       = n;
507:     sf->nranks       = size;
508:     sf->minleaf      = 0;
509:     sf->maxleaf      = N - 1;
510:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
511:     sf->nleaves      = rank ? 0 : N;
512:     sf->nroots       = n;
513:     sf->nranks       = rank ? 0 : size;
514:     sf->minleaf      = 0;
515:     sf->maxleaf      = rank ? -1 : N - 1;
516:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
517:     sf->nleaves      = size;
518:     sf->nroots       = size;
519:     sf->nranks       = size;
520:     sf->minleaf      = 0;
521:     sf->maxleaf      = size - 1;
522:   }
523:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
524:   sf->graphset = PETSC_TRUE;
525:   return(0);
526: }

528: /*@
529:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

531:    Collective

533:    Input Arguments:

535: .  sf - star forest to invert

537:    Output Arguments:
538: .  isf - inverse of sf
539:    Level: advanced

541:    Notes:
542:    All roots must have degree 1.

544:    The local space may be a permutation, but cannot be sparse.

546: .seealso: PetscSFSetGraph()
547: @*/
548: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
549: {
551:   PetscMPIInt    rank;
552:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
553:   const PetscInt *ilocal;
554:   PetscSFNode    *roots,*leaves;

558:   PetscSFCheckGraphSet(sf,1);

561:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
562:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

564:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
565:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
566:   for (i=0; i<maxlocal; i++) {
567:     leaves[i].rank  = rank;
568:     leaves[i].index = i;
569:   }
570:   for (i=0; i <nroots; i++) {
571:     roots[i].rank  = -1;
572:     roots[i].index = -1;
573:   }
574:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
575:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);

577:   /* Check whether our leaves are sparse */
578:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
579:   if (count == nroots) newilocal = NULL;
580:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
581:     PetscMalloc1(count,&newilocal);
582:     for (i=0,count=0; i<nroots; i++) {
583:       if (roots[i].rank >= 0) {
584:         newilocal[count]   = i;
585:         roots[count].rank  = roots[i].rank;
586:         roots[count].index = roots[i].index;
587:         count++;
588:       }
589:     }
590:   }

592:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
593:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
594:   PetscFree2(roots,leaves);
595:   return(0);
596: }

598: /*@
599:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

601:    Collective

603:    Input Arguments:
604: +  sf - communication object to duplicate
605: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

607:    Output Arguments:
608: .  newsf - new communication object

610:    Level: beginner

612: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
615: {
616:   PetscSFType    type;

623:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
624:   PetscSFGetType(sf,&type);
625:   if (type) {PetscSFSetType(*newsf,type);}
626:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
627:     PetscSFCheckGraphSet(sf,1);
628:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
629:       PetscInt          nroots,nleaves;
630:       const PetscInt    *ilocal;
631:       const PetscSFNode *iremote;
632:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
633:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
634:     } else {
635:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
636:     }
637:   }
638:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
639:   return(0);
640: }

642: /*@C
643:    PetscSFGetGraph - Get the graph specifying a parallel star forest

645:    Not Collective

647:    Input Arguments:
648: .  sf - star forest

650:    Output Arguments:
651: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
652: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
653: .  ilocal - locations of leaves in leafdata buffers
654: -  iremote - remote locations of root vertices for each leaf on the current process

656:    Notes:
657:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

659:    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
660:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

662:    Level: intermediate

664: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
665: @*/
666: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
667: {

672:   if (sf->ops->GetGraph) {
673:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
674:   } else {
675:     if (nroots) *nroots = sf->nroots;
676:     if (nleaves) *nleaves = sf->nleaves;
677:     if (ilocal) *ilocal = sf->mine;
678:     if (iremote) *iremote = sf->remote;
679:   }
680:   return(0);
681: }

683: /*@
684:    PetscSFGetLeafRange - Get the active leaf ranges

686:    Not Collective

688:    Input Arguments:
689: .  sf - star forest

691:    Output Arguments:
692: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
693: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

695:    Level: developer

697: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
698: @*/
699: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
700: {

704:   PetscSFCheckGraphSet(sf,1);
705:   if (minleaf) *minleaf = sf->minleaf;
706:   if (maxleaf) *maxleaf = sf->maxleaf;
707:   return(0);
708: }

710: /*@C
711:    PetscSFView - view a star forest

713:    Collective

715:    Input Arguments:
716: +  sf - star forest
717: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

719:    Level: beginner

721: .seealso: PetscSFCreate(), PetscSFSetGraph()
722: @*/
723: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
724: {
725:   PetscErrorCode    ierr;
726:   PetscBool         iascii;
727:   PetscViewerFormat format;

731:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
734:   if (sf->graphset) {PetscSFSetUp(sf);}
735:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
736:   if (iascii) {
737:     PetscMPIInt rank;
738:     PetscInt    ii,i,j;

740:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
741:     PetscViewerASCIIPushTab(viewer);
742:     if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
743:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
744:       if (!sf->graphset) {
745:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
746:         PetscViewerASCIIPopTab(viewer);
747:         return(0);
748:       }
749:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
750:       PetscViewerASCIIPushSynchronized(viewer);
751:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
752:       for (i=0; i<sf->nleaves; i++) {
753:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
754:       }
755:       PetscViewerFlush(viewer);
756:       PetscViewerGetFormat(viewer,&format);
757:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
758:         PetscMPIInt *tmpranks,*perm;
759:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
760:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
761:         for (i=0; i<sf->nranks; i++) perm[i] = i;
762:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
763:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
764:         for (ii=0; ii<sf->nranks; ii++) {
765:           i = perm[ii];
766:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
767:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
768:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
769:           }
770:         }
771:         PetscFree2(tmpranks,perm);
772:       }
773:       PetscViewerFlush(viewer);
774:       PetscViewerASCIIPopSynchronized(viewer);
775:     }
776:     PetscViewerASCIIPopTab(viewer);
777:   }
778:   return(0);
779: }

781: /*@C
782:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

784:    Not Collective

786:    Input Arguments:
787: .  sf - star forest

789:    Output Arguments:
790: +  nranks - number of ranks referenced by local part
791: .  ranks - array of ranks
792: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
793: .  rmine - concatenated array holding local indices referencing each remote rank
794: -  rremote - concatenated array holding remote indices referenced for each remote rank

796:    Level: developer

798: .seealso: PetscSFGetLeafRanks()
799: @*/
800: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
801: {

806:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
807:   if (sf->ops->GetRootRanks) {
808:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
809:   } else {
810:     /* The generic implementation */
811:     if (nranks)  *nranks  = sf->nranks;
812:     if (ranks)   *ranks   = sf->ranks;
813:     if (roffset) *roffset = sf->roffset;
814:     if (rmine)   *rmine   = sf->rmine;
815:     if (rremote) *rremote = sf->rremote;
816:   }
817:   return(0);
818: }

820: /*@C
821:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

823:    Not Collective

825:    Input Arguments:
826: .  sf - star forest

828:    Output Arguments:
829: +  niranks - number of leaf ranks referencing roots on this process
830: .  iranks - array of ranks
831: .  ioffset - offset in irootloc for each rank (length niranks+1)
832: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

834:    Level: developer

836: .seealso: PetscSFGetRootRanks()
837: @*/
838: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
839: {

844:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
845:   if (sf->ops->GetLeafRanks) {
846:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
847:   } else {
848:     PetscSFType type;
849:     PetscSFGetType(sf,&type);
850:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
851:   }
852:   return(0);
853: }

855: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
856:   PetscInt i;
857:   for (i=0; i<n; i++) {
858:     if (needle == list[i]) return PETSC_TRUE;
859:   }
860:   return PETSC_FALSE;
861: }

863: /*@C
864:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

866:    Collective

868:    Input Arguments:
869: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
870: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

872:    Level: developer

874: .seealso: PetscSFGetRootRanks()
875: @*/
876: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
877: {
878:   PetscErrorCode     ierr;
879:   PetscTable         table;
880:   PetscTablePosition pos;
881:   PetscMPIInt        size,groupsize,*groupranks;
882:   PetscInt           *rcount,*ranks;
883:   PetscInt           i, irank = -1,orank = -1;

887:   PetscSFCheckGraphSet(sf,1);
888:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
889:   PetscTableCreate(10,size,&table);
890:   for (i=0; i<sf->nleaves; i++) {
891:     /* Log 1-based rank */
892:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
893:   }
894:   PetscTableGetCount(table,&sf->nranks);
895:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
896:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
897:   PetscTableGetHeadPosition(table,&pos);
898:   for (i=0; i<sf->nranks; i++) {
899:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
900:     ranks[i]--;             /* Convert back to 0-based */
901:   }
902:   PetscTableDestroy(&table);

904:   /* We expect that dgroup is reliably "small" while nranks could be large */
905:   {
906:     MPI_Group group = MPI_GROUP_NULL;
907:     PetscMPIInt *dgroupranks;
908:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
909:     MPI_Group_size(dgroup,&groupsize);
910:     PetscMalloc1(groupsize,&dgroupranks);
911:     PetscMalloc1(groupsize,&groupranks);
912:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
913:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
914:     MPI_Group_free(&group);
915:     PetscFree(dgroupranks);
916:   }

918:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
919:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
920:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
921:       if (InList(ranks[i],groupsize,groupranks)) break;
922:     }
923:     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
924:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
925:     }
926:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
927:       PetscInt    tmprank,tmpcount;

929:       tmprank             = ranks[i];
930:       tmpcount            = rcount[i];
931:       ranks[i]            = ranks[sf->ndranks];
932:       rcount[i]           = rcount[sf->ndranks];
933:       ranks[sf->ndranks]  = tmprank;
934:       rcount[sf->ndranks] = tmpcount;
935:       sf->ndranks++;
936:     }
937:   }
938:   PetscFree(groupranks);
939:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
940:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
941:   sf->roffset[0] = 0;
942:   for (i=0; i<sf->nranks; i++) {
943:     PetscMPIIntCast(ranks[i],sf->ranks+i);
944:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
945:     rcount[i]        = 0;
946:   }
947:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
948:     /* short circuit */
949:     if (orank != sf->remote[i].rank) {
950:       /* Search for index of iremote[i].rank in sf->ranks */
951:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
952:       if (irank < 0) {
953:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
954:         if (irank >= 0) irank += sf->ndranks;
955:       }
956:       orank = sf->remote[i].rank;
957:     }
958:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
959:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
960:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
961:     rcount[irank]++;
962:   }
963:   PetscFree2(rcount,ranks);
964:   return(0);
965: }

967: /*@C
968:    PetscSFGetGroups - gets incoming and outgoing process groups

970:    Collective

972:    Input Argument:
973: .  sf - star forest

975:    Output Arguments:
976: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
977: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

979:    Level: developer

981: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
982: @*/
983: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
984: {
986:   MPI_Group      group = MPI_GROUP_NULL;

989:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
990:   if (sf->ingroup == MPI_GROUP_NULL) {
991:     PetscInt       i;
992:     const PetscInt *indegree;
993:     PetscMPIInt    rank,*outranks,*inranks;
994:     PetscSFNode    *remote;
995:     PetscSF        bgcount;

997:     /* Compute the number of incoming ranks */
998:     PetscMalloc1(sf->nranks,&remote);
999:     for (i=0; i<sf->nranks; i++) {
1000:       remote[i].rank  = sf->ranks[i];
1001:       remote[i].index = 0;
1002:     }
1003:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1004:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1005:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1006:     PetscSFComputeDegreeEnd(bgcount,&indegree);

1008:     /* Enumerate the incoming ranks */
1009:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1010:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1011:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1012:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1013:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1014:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1015:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1016:     MPI_Group_free(&group);
1017:     PetscFree2(inranks,outranks);
1018:     PetscSFDestroy(&bgcount);
1019:   }
1020:   *incoming = sf->ingroup;

1022:   if (sf->outgroup == MPI_GROUP_NULL) {
1023:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1024:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1025:     MPI_Group_free(&group);
1026:   }
1027:   *outgoing = sf->outgroup;
1028:   return(0);
1029: }

1031: /*@
1032:    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

1034:    Collective

1036:    Input Argument:
1037: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1039:    Output Arguments:
1040: .  multi - star forest with split roots, such that each root has degree exactly 1

1042:    Level: developer

1044:    Notes:

1046:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1047:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1048:    edge, it is a candidate for future optimization that might involve its removal.

1050: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1051: @*/
1052: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1053: {

1059:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1060:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1061:     *multi = sf->multi;
1062:     return(0);
1063:   }
1064:   if (!sf->multi) {
1065:     const PetscInt *indegree;
1066:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1067:     PetscSFNode    *remote;
1068:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1069:     PetscSFComputeDegreeBegin(sf,&indegree);
1070:     PetscSFComputeDegreeEnd(sf,&indegree);
1071:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1072:     inoffset[0] = 0;
1073:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1074:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1075:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1076:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1077:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1078: #if 0
1079: #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1080:     for (i=0; i<sf->nroots; i++) {
1081:       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1082:     }
1083: #endif
1084: #endif
1085:     PetscMalloc1(sf->nleaves,&remote);
1086:     for (i=0; i<sf->nleaves; i++) {
1087:       remote[i].rank  = sf->remote[i].rank;
1088:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1089:     }
1090:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1091:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1092:     if (sf->rankorder) {        /* Sort the ranks */
1093:       PetscMPIInt rank;
1094:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1095:       PetscSFNode *newremote;
1096:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1097:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1098:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1099:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1100:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1101:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1102:       /* Sort the incoming ranks at each vertex, build the inverse map */
1103:       for (i=0; i<sf->nroots; i++) {
1104:         PetscInt j;
1105:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1106:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1107:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1108:       }
1109:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1110:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1111:       PetscMalloc1(sf->nleaves,&newremote);
1112:       for (i=0; i<sf->nleaves; i++) {
1113:         newremote[i].rank  = sf->remote[i].rank;
1114:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1115:       }
1116:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1117:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1118:     }
1119:     PetscFree3(inoffset,outones,outoffset);
1120:   }
1121:   *multi = sf->multi;
1122:   return(0);
1123: }

1125: /*@C
1126:    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices

1128:    Collective

1130:    Input Arguments:
1131: +  sf - original star forest
1132: .  nselected  - number of selected roots on this process
1133: -  selected   - indices of the selected roots on this process

1135:    Output Arguments:
1136: .  newsf - new star forest

1138:    Level: advanced

1140:    Note:
1141:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1142:    be done by calling PetscSFGetGraph().

1144: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1145: @*/
1146: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1147: {
1148:   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1149:   const PetscSFNode *iremote;
1150:   PetscSFNode       *new_iremote;
1151:   PetscSF           tmpsf;
1152:   MPI_Comm          comm;
1153:   PetscErrorCode    ierr;

1157:   PetscSFCheckGraphSet(sf,1);

1161:   PetscSFSetUp(sf);

1163:   /* Uniq selected[] and put results in roots[] */
1164:   PetscObjectGetComm((PetscObject)sf,&comm);
1165:   PetscMalloc1(nselected,&roots);
1166:   PetscArraycpy(roots,selected,nselected);
1167:   PetscSortedRemoveDupsInt(&nselected,roots);
1168:   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);

1170:   if (sf->ops->CreateEmbeddedSF) {
1171:     (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);
1172:   } else {
1173:     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1174:     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1175:     PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
1176:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);
1177:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
1178:     PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);
1179:     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1180:     PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);
1181:     PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);
1182:     PetscSFDestroy(&tmpsf);

1184:     /* Build newsf with leaves that are still connected */
1185:     connected_leaves = 0;
1186:     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1187:     PetscMalloc1(connected_leaves,&new_ilocal);
1188:     PetscMalloc1(connected_leaves,&new_iremote);
1189:     for (i=0, n=0; i<nleaves; ++i) {
1190:       if (leafdata[i]) {
1191:         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1192:         new_iremote[n].rank  = sf->remote[i].rank;
1193:         new_iremote[n].index = sf->remote[i].index;
1194:         ++n;
1195:       }
1196:     }

1198:     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1199:     PetscSFCreate(comm,newsf);
1200:     PetscSFSetFromOptions(*newsf);
1201:     PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1202:     PetscFree2(rootdata,leafdata);
1203:   }
1204:   PetscFree(roots);
1205:   return(0);
1206: }

1208: /*@C
1209:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1211:   Collective

1213:   Input Arguments:
1214: + sf - original star forest
1215: . nselected  - number of selected leaves on this process
1216: - selected   - indices of the selected leaves on this process

1218:   Output Arguments:
1219: .  newsf - new star forest

1221:   Level: advanced

1223: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1224: @*/
1225: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1226: {
1227:   const PetscSFNode *iremote;
1228:   PetscSFNode       *new_iremote;
1229:   const PetscInt    *ilocal;
1230:   PetscInt          i,nroots,*leaves,*new_ilocal;
1231:   MPI_Comm          comm;
1232:   PetscErrorCode    ierr;

1236:   PetscSFCheckGraphSet(sf,1);

1240:   /* Uniq selected[] and put results in leaves[] */
1241:   PetscObjectGetComm((PetscObject)sf,&comm);
1242:   PetscMalloc1(nselected,&leaves);
1243:   PetscArraycpy(leaves,selected,nselected);
1244:   PetscSortedRemoveDupsInt(&nselected,leaves);
1245:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1247:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1248:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1249:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1250:   } else {
1251:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1252:     PetscMalloc1(nselected,&new_ilocal);
1253:     PetscMalloc1(nselected,&new_iremote);
1254:     for (i=0; i<nselected; ++i) {
1255:       const PetscInt l     = leaves[i];
1256:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1257:       new_iremote[i].rank  = iremote[l].rank;
1258:       new_iremote[i].index = iremote[l].index;
1259:     }
1260:     PetscSFCreate(comm,newsf);
1261:     PetscSFSetFromOptions(*newsf);
1262:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1263:   }
1264:   PetscFree(leaves);
1265:   return(0);
1266: }

1268: /*@C
1269:    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()

1271:    Collective on PetscSF

1273:    Input Arguments:
1274: +  sf - star forest on which to communicate
1275: .  unit - data type associated with each node
1276: .  rootdata - buffer to broadcast
1277: -  op - operation to use for reduction

1279:    Output Arguments:
1280: .  leafdata - buffer to be reduced with values from each leaf's respective root

1282:    Level: intermediate

1284: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1285: @*/
1286: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1287: {
1289:   PetscMemType   rootmtype,leafmtype;

1293:   PetscSFSetUp(sf);
1294:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1295:   PetscGetMemType(rootdata,&rootmtype);
1296:   PetscGetMemType(leafdata,&leafmtype);
1297: #if defined(PETSC_HAVE_CUDA)
1298:   /*  Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)?
1299:     To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream.
1300:     But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if
1301:     we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also
1302:     computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize
1303:     is inevitable. We do it now and put pack/unpack kernels to non-NULL streams.
1304:    */
1305:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1306: #endif
1307:   (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1308:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1309:   return(0);
1310: }

1312: /*@C
1313:    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()

1315:    Collective

1317:    Input Arguments:
1318: +  sf - star forest
1319: .  unit - data type
1320: .  rootdata - buffer to broadcast
1321: -  op - operation to use for reduction

1323:    Output Arguments:
1324: .  leafdata - buffer to be reduced with values from each leaf's respective root

1326:    Level: intermediate

1328: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1329: @*/
1330: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1331: {
1333:   PetscMemType   rootmtype,leafmtype;

1337:   PetscSFSetUp(sf);
1338:   PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1339:   PetscGetMemType(rootdata,&rootmtype);
1340:   PetscGetMemType(leafdata,&leafmtype);
1341:   (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1342:   PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1343:   return(0);
1344: }

1346: /*@C
1347:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1349:    Collective

1351:    Input Arguments:
1352: +  sf - star forest
1353: .  unit - data type
1354: .  leafdata - values to reduce
1355: -  op - reduction operation

1357:    Output Arguments:
1358: .  rootdata - result of reduction of values from all leaves of each root

1360:    Level: intermediate

1362: .seealso: PetscSFBcastBegin()
1363: @*/
1364: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1365: {
1367:   PetscMemType   rootmtype,leafmtype;

1371:   PetscSFSetUp(sf);
1372:   PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1373:   PetscGetMemType(rootdata,&rootmtype);
1374:   PetscGetMemType(leafdata,&leafmtype);
1375: #if defined(PETSC_HAVE_CUDA)
1376:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1377: #endif
1378:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1379:   PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1380:   return(0);
1381: }

1383: /*@C
1384:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1386:    Collective

1388:    Input Arguments:
1389: +  sf - star forest
1390: .  unit - data type
1391: .  leafdata - values to reduce
1392: -  op - reduction operation

1394:    Output Arguments:
1395: .  rootdata - result of reduction of values from all leaves of each root

1397:    Level: intermediate

1399: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1400: @*/
1401: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1402: {
1404:   PetscMemType   rootmtype,leafmtype;

1408:   PetscSFSetUp(sf);
1409:   PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1410:   PetscGetMemType(rootdata,&rootmtype);
1411:   PetscGetMemType(leafdata,&leafmtype);
1412:   (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1413:   PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1414:   return(0);
1415: }

1417: /*@C
1418:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1420:    Collective

1422:    Input Arguments:
1423: +  sf - star forest
1424: .  unit - data type
1425: .  leafdata - leaf values to use in reduction
1426: -  op - operation to use for reduction

1428:    Output Arguments:
1429: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1430: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1432:    Level: advanced

1434:    Note:
1435:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1436:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1437:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1438:    integers.

1440: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1441: @*/
1442: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1443: {
1445:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1449:   PetscSFSetUp(sf);
1450:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1451:   PetscGetMemType(rootdata,&rootmtype);
1452:   PetscGetMemType(leafdata,&leafmtype);
1453:   PetscGetMemType(leafupdate,&leafupdatemtype);
1454:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1455: #if defined(PETSC_HAVE_CUDA)
1456:   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1457: #endif
1458:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1459:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1460:   return(0);
1461: }

1463: /*@C
1464:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1466:    Collective

1468:    Input Arguments:
1469: +  sf - star forest
1470: .  unit - data type
1471: .  leafdata - leaf values to use in reduction
1472: -  op - operation to use for reduction

1474:    Output Arguments:
1475: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1476: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1478:    Level: advanced

1480: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1481: @*/
1482: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1483: {
1485:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1489:   PetscSFSetUp(sf);
1490:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1491:   PetscGetMemType(rootdata,&rootmtype);
1492:   PetscGetMemType(leafdata,&leafmtype);
1493:   PetscGetMemType(leafupdate,&leafupdatemtype);
1494:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1495:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1496:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1497:   return(0);
1498: }

1500: /*@C
1501:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1503:    Collective

1505:    Input Arguments:
1506: .  sf - star forest

1508:    Output Arguments:
1509: .  degree - degree of each root vertex

1511:    Level: advanced

1513:    Notes:
1514:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1516: .seealso: PetscSFGatherBegin()
1517: @*/
1518: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1519: {

1524:   PetscSFCheckGraphSet(sf,1);
1526:   if (!sf->degreeknown) {
1527:     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1528:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1529:     PetscMalloc1(nroots,&sf->degree);
1530:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1531:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1532:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1533:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1534:   }
1535:   *degree = NULL;
1536:   return(0);
1537: }

1539: /*@C
1540:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1542:    Collective

1544:    Input Arguments:
1545: .  sf - star forest

1547:    Output Arguments:
1548: .  degree - degree of each root vertex

1550:    Level: developer

1552:    Notes:
1553:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1555: .seealso:
1556: @*/
1557: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1558: {

1563:   PetscSFCheckGraphSet(sf,1);
1565:   if (!sf->degreeknown) {
1566:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1567:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1568:     PetscFree(sf->degreetmp);
1569:     sf->degreeknown = PETSC_TRUE;
1570:   }
1571:   *degree = sf->degree;
1572:   return(0);
1573: }


1576: /*@C
1577:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1578:    Each multi-root is assigned index of the corresponding original root.

1580:    Collective

1582:    Input Arguments:
1583: +  sf - star forest
1584: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1586:    Output Arguments:
1587: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1588: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1590:    Level: developer

1592:    Notes:
1593:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1595: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1596: @*/
1597: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1598: {
1599:   PetscSF             msf;
1600:   PetscInt            i, j, k, nroots, nmroots;
1601:   PetscErrorCode      ierr;

1605:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1609:   PetscSFGetMultiSF(sf,&msf);
1610:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1611:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1612:   for (i=0,j=0,k=0; i<nroots; i++) {
1613:     if (!degree[i]) continue;
1614:     for (j=0; j<degree[i]; j++,k++) {
1615:       (*multiRootsOrigNumbering)[k] = i;
1616:     }
1617:   }
1618: #if defined(PETSC_USE_DEBUG)
1619:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1620: #endif
1621:   if (nMultiRoots) *nMultiRoots = nmroots;
1622:   return(0);
1623: }

1625: /*@C
1626:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1628:    Collective

1630:    Input Arguments:
1631: +  sf - star forest
1632: .  unit - data type
1633: -  leafdata - leaf data to gather to roots

1635:    Output Argument:
1636: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1638:    Level: intermediate

1640: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1641: @*/
1642: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1643: {
1645:   PetscSF        multi;

1649:   PetscSFSetUp(sf);
1650:   PetscSFGetMultiSF(sf,&multi);
1651:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1652:   return(0);
1653: }

1655: /*@C
1656:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1658:    Collective

1660:    Input Arguments:
1661: +  sf - star forest
1662: .  unit - data type
1663: -  leafdata - leaf data to gather to roots

1665:    Output Argument:
1666: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1668:    Level: intermediate

1670: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1671: @*/
1672: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1673: {
1675:   PetscSF        multi;

1679:   PetscSFSetUp(sf);
1680:   PetscSFGetMultiSF(sf,&multi);
1681:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1682:   return(0);
1683: }

1685: /*@C
1686:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1688:    Collective

1690:    Input Arguments:
1691: +  sf - star forest
1692: .  unit - data type
1693: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1695:    Output Argument:
1696: .  leafdata - leaf data to be update with personal data from each respective root

1698:    Level: intermediate

1700: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1701: @*/
1702: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1703: {
1705:   PetscSF        multi;

1709:   PetscSFSetUp(sf);
1710:   PetscSFGetMultiSF(sf,&multi);
1711:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1712:   return(0);
1713: }

1715: /*@C
1716:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1718:    Collective

1720:    Input Arguments:
1721: +  sf - star forest
1722: .  unit - data type
1723: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1725:    Output Argument:
1726: .  leafdata - leaf data to be update with personal data from each respective root

1728:    Level: intermediate

1730: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1731: @*/
1732: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1733: {
1735:   PetscSF        multi;

1739:   PetscSFSetUp(sf);
1740:   PetscSFGetMultiSF(sf,&multi);
1741:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1742:   return(0);
1743: }

1745: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1746: {
1747: #if defined(PETSC_USE_DEBUG)
1748:   PetscInt        i, n, nleaves;
1749:   const PetscInt *ilocal = NULL;
1750:   PetscHSetI      seen;
1751:   PetscErrorCode  ierr;

1754:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1755:   PetscHSetICreate(&seen);
1756:   for (i = 0; i < nleaves; i++) {
1757:     const PetscInt leaf = ilocal ? ilocal[i] : i;
1758:     PetscHSetIAdd(seen,leaf);
1759:   }
1760:   PetscHSetIGetSize(seen,&n);
1761:   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1762:   PetscHSetIDestroy(&seen);
1763:   return(0);
1764: #else
1766:   return(0);
1767: #endif
1768: }
1769: /*@
1770:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1772:   Input Parameters:
1773: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1774: - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor

1776:   Output Parameters:
1777: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB

1779:   Level: developer

1781:   Notes:
1782:   For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is
1783:   checked in debug mode.

1785: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1786: @*/
1787: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1788: {
1789:   PetscErrorCode    ierr;
1790:   MPI_Comm          comm;
1791:   const PetscSFNode *remotePointsA,*remotePointsB;
1792:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1793:   const PetscInt    *localPointsA,*localPointsB,*localPointsBA;
1794:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;

1798:   PetscSFCheckGraphSet(sfA,1);
1800:   PetscSFCheckGraphSet(sfB,2);
1802:   PetscObjectGetComm((PetscObject)sfA,&comm);
1803:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1804:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1805:   PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1806:   if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves");
1807:   if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1808:   /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug
1809:    mode, check properly. */
1810:   PetscSFCheckLeavesUnique_Private(sfA);
1811:   if (localPointsA) {
1812:     /* Local space is dense permutation of identity. Need to rewire order of the remote points */
1813:     PetscMalloc1(numLeavesA,&reorderedRemotePointsA);
1814:     for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i];
1815:     remotePointsA = reorderedRemotePointsA;
1816:   }
1817:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1818:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1819:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1820:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1821:   PetscFree(reorderedRemotePointsA);

1823:   /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */
1824:   PetscSFCheckLeavesUnique_Private(sfB);
1825:   if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */
1826:     localPointsBA  = NULL;
1827:     remotePointsBA = leafdataB;
1828:   } else {
1829:     localPointsBA  = localPointsB;
1830:     PetscMalloc1(numLeavesB,&remotePointsBA);
1831:     for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf];
1832:     PetscFree(leafdataB);
1833:   }
1834:   PetscSFCreate(comm, sfBA);
1835:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1836:   return(0);
1837: }

1839: /*@
1840:   PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one

1842:   Input Parameters:
1843: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1844: - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1.

1846:   Output Parameters:
1847: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB

1849:   Level: developer

1851: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
1852: @*/
1853: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1854: {
1855:   PetscErrorCode    ierr;
1856:   MPI_Comm          comm;
1857:   const PetscSFNode *remotePointsA,*remotePointsB;
1858:   PetscSFNode       *remotePointsBA;
1859:   const PetscInt    *localPointsA,*localPointsB;
1860:   PetscInt          numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;

1864:   PetscSFCheckGraphSet(sfA,1);
1866:   PetscSFCheckGraphSet(sfB,2);
1868:   PetscObjectGetComm((PetscObject) sfA, &comm);
1869:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1870:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1871:   PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1872:   if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1873:   if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves");
1874:   PetscMalloc1(numRootsB,&remotePointsBA);
1875:   PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1876:   PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1877:   PetscSFCreate(comm,sfBA);
1878:   PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1879:   return(0);
1880: }

1882: /*
1883:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

1885:   Input Parameters:
1886: . sf - The global PetscSF

1888:   Output Parameters:
1889: . out - The local PetscSF
1890:  */
1891: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1892: {
1893:   MPI_Comm           comm;
1894:   PetscMPIInt        myrank;
1895:   const PetscInt     *ilocal;
1896:   const PetscSFNode  *iremote;
1897:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1898:   PetscSFNode        *liremote;
1899:   PetscSF            lsf;
1900:   PetscErrorCode     ierr;

1904:   if (sf->ops->CreateLocalSF) {
1905:     (*sf->ops->CreateLocalSF)(sf,out);
1906:   } else {
1907:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1908:     PetscObjectGetComm((PetscObject)sf,&comm);
1909:     MPI_Comm_rank(comm,&myrank);

1911:     /* Find out local edges and build a local SF */
1912:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1913:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1914:     PetscMalloc1(lnleaves,&lilocal);
1915:     PetscMalloc1(lnleaves,&liremote);

1917:     for (i=j=0; i<nleaves; i++) {
1918:       if (iremote[i].rank == (PetscInt)myrank) {
1919:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1920:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
1921:         liremote[j].index = iremote[i].index;
1922:         j++;
1923:       }
1924:     }
1925:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
1926:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
1927:     PetscSFSetUp(lsf);
1928:     *out = lsf;
1929:   }
1930:   return(0);
1931: }

1933: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
1934: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1935: {
1936:   PetscErrorCode     ierr;
1937:   PetscMemType       rootmtype,leafmtype;

1941:   PetscSFSetUp(sf);
1942:   PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1943:   PetscGetMemType(rootdata,&rootmtype);
1944:   PetscGetMemType(leafdata,&leafmtype);
1945:   if (sf->ops->BcastToZero) {
1946:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
1947:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
1948:   PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1949:   return(0);
1950: }