Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petscctable.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: void PetscSFCheckGraphSet(PetscSF,int);
 16: #else
 17: #if defined(PETSC_USE_DEBUG)
 18: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
 19:     if (PetscUnlikely(!(sf)->graphset))                              \
 20:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
 21:   } while (0)
 22: #else
 23: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 24: #endif
 25: #endif

 27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};

 29: /*@
 30:    PetscSFCreate - create a star forest communication context

 32:    Collective

 34:    Input Parameter:
 35: .  comm - communicator on which the star forest will operate

 37:    Output Parameter:
 38: .  sf - new star forest context

 40:    Options Database Keys:
 41: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 42: .  -sf_type window    -Use MPI-3 one-sided window for communication
 43: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 45:    Level: intermediate

 47:    Notes:
 48:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 49:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 50:    SFs are optimized and they have better performance than general SFs.

 52: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 53: @*/
 54: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 55: {
 56:   PetscSF        b;

 59:   PetscSFInitializePackage();

 61:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 63:   b->nroots    = -1;
 64:   b->nleaves   = -1;
 65:   b->minleaf   = PETSC_MAX_INT;
 66:   b->maxleaf   = PETSC_MIN_INT;
 67:   b->nranks    = -1;
 68:   b->rankorder = PETSC_TRUE;
 69:   b->ingroup   = MPI_GROUP_NULL;
 70:   b->outgroup  = MPI_GROUP_NULL;
 71:   b->graphset  = PETSC_FALSE;
 72: #if defined(PETSC_HAVE_DEVICE)
 73:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 74:   b->use_stream_aware_mpi = PETSC_FALSE;
 75:   b->unknown_input_stream= PETSC_FALSE;
 76:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 77:     b->backend = PETSCSF_BACKEND_KOKKOS;
 78:   #elif defined(PETSC_HAVE_CUDA)
 79:     b->backend = PETSCSF_BACKEND_CUDA;
 80:   #elif defined(PETSC_HAVE_HIP)
 81:     b->backend = PETSCSF_BACKEND_HIP;
 82:   #endif

 84:   #if defined(PETSC_HAVE_NVSHMEM)
 85:     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 86:     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 87:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
 88:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
 89:   #endif
 90: #endif
 91:   b->vscat.from_n = -1;
 92:   b->vscat.to_n   = -1;
 93:   b->vscat.unit   = MPIU_SCALAR;
 94:  *sf = b;
 95:   return 0;
 96: }

 98: /*@
 99:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

101:    Collective

103:    Input Parameter:
104: .  sf - star forest

106:    Level: advanced

108: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
109: @*/
110: PetscErrorCode PetscSFReset(PetscSF sf)
111: {
113:   if (sf->ops->Reset) (*sf->ops->Reset)(sf);
114:   sf->nroots   = -1;
115:   sf->nleaves  = -1;
116:   sf->minleaf  = PETSC_MAX_INT;
117:   sf->maxleaf  = PETSC_MIN_INT;
118:   sf->mine     = NULL;
119:   sf->remote   = NULL;
120:   sf->graphset = PETSC_FALSE;
121:   PetscFree(sf->mine_alloc);
122:   PetscFree(sf->remote_alloc);
123:   sf->nranks = -1;
124:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
125:   sf->degreeknown = PETSC_FALSE;
126:   PetscFree(sf->degree);
127:   if (sf->ingroup  != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
128:   if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
129:   if (sf->multi) sf->multi->multi = NULL;
130:   PetscSFDestroy(&sf->multi);
131:   PetscLayoutDestroy(&sf->map);

133:  #if defined(PETSC_HAVE_DEVICE)
134:   for (PetscInt i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);
135:  #endif

137:   sf->setupcalled = PETSC_FALSE;
138:   return 0;
139: }

141: /*@C
142:    PetscSFSetType - Set the PetscSF communication implementation

144:    Collective on PetscSF

146:    Input Parameters:
147: +  sf - the PetscSF context
148: -  type - a known method

150:    Options Database Key:
151: .  -sf_type <type> - Sets the method; use -help for a list
152:    of available methods (for instance, window, basic, neighbor)

154:    Notes:
155:    See "include/petscsf.h" for available methods (for instance)
156: +    PETSCSFWINDOW - MPI-2/3 one-sided
157: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

159:   Level: intermediate

161: .seealso: PetscSFType, PetscSFCreate()
162: @*/
163: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
164: {
165:   PetscBool      match;
166:   PetscErrorCode (*r)(PetscSF);


171:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
172:   if (match) return 0;

174:   PetscFunctionListFind(PetscSFList,type,&r);
176:   /* Destroy the previous PetscSF implementation context */
177:   if (sf->ops->Destroy) (*(sf)->ops->Destroy)(sf);
178:   PetscMemzero(sf->ops,sizeof(*sf->ops));
179:   PetscObjectChangeTypeName((PetscObject)sf,type);
180:   (*r)(sf);
181:   return 0;
182: }

184: /*@C
185:   PetscSFGetType - Get the PetscSF communication implementation

187:   Not Collective

189:   Input Parameter:
190: . sf  - the PetscSF context

192:   Output Parameter:
193: . type - the PetscSF type name

195:   Level: intermediate

197: .seealso: PetscSFSetType(), PetscSFCreate()
198: @*/
199: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200: {
203:   *type = ((PetscObject)sf)->type_name;
204:   return 0;
205: }

207: /*@C
208:    PetscSFDestroy - destroy star forest

210:    Collective

212:    Input Parameter:
213: .  sf - address of star forest

215:    Level: intermediate

217: .seealso: PetscSFCreate(), PetscSFReset()
218: @*/
219: PetscErrorCode PetscSFDestroy(PetscSF *sf)
220: {
221:   if (!*sf) return 0;
223:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return 0;}
224:   PetscSFReset(*sf);
225:   if ((*sf)->ops->Destroy) (*(*sf)->ops->Destroy)(*sf);
226:   PetscSFDestroy(&(*sf)->vscat.lsf);
227:   if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
228:   PetscHeaderDestroy(sf);
229:   return 0;
230: }

232: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
233: {
234:   PetscInt           i, nleaves;
235:   PetscMPIInt        size;
236:   const PetscInt    *ilocal;
237:   const PetscSFNode *iremote;

239:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
240:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
241:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
242:   for (i = 0; i < nleaves; i++) {
243:     const PetscInt rank = iremote[i].rank;
244:     const PetscInt remote = iremote[i].index;
245:     const PetscInt leaf = ilocal ? ilocal[i] : i;
249:   }
250:   return 0;
251: }

253: /*@
254:    PetscSFSetUp - set up communication structures

256:    Collective

258:    Input Parameter:
259: .  sf - star forest communication object

261:    Level: beginner

263: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
264: @*/
265: PetscErrorCode PetscSFSetUp(PetscSF sf)
266: {
268:   PetscSFCheckGraphSet(sf,1);
269:   if (sf->setupcalled) return 0;
270:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
271:   PetscSFCheckGraphValid_Private(sf);
272:   if (!((PetscObject)sf)->type_name) PetscSFSetType(sf,PETSCSFBASIC); /* Zero all sf->ops */
273:   if (sf->ops->SetUp) (*sf->ops->SetUp)(sf);
274: #if defined(PETSC_HAVE_CUDA)
275:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
276:     sf->ops->Malloc = PetscSFMalloc_CUDA;
277:     sf->ops->Free   = PetscSFFree_CUDA;
278:   }
279: #endif
280: #if defined(PETSC_HAVE_HIP)
281:   if (sf->backend == PETSCSF_BACKEND_HIP) {
282:     sf->ops->Malloc = PetscSFMalloc_HIP;
283:     sf->ops->Free   = PetscSFFree_HIP;
284:   }
285: #endif

287: #
288: #if defined(PETSC_HAVE_KOKKOS)
289:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
290:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
291:     sf->ops->Free   = PetscSFFree_Kokkos;
292:   }
293: #endif
294:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
295:   sf->setupcalled = PETSC_TRUE;
296:   return 0;
297: }

299: /*@
300:    PetscSFSetFromOptions - set PetscSF options using the options database

302:    Logically Collective

304:    Input Parameter:
305: .  sf - star forest

307:    Options Database Keys:
308: +  -sf_type               - implementation type, see PetscSFSetType()
309: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
310: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
311:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
312:                             If true, this option only works with -use_gpu_aware_mpi 1.
313: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
314:                                If true, this option only works with -use_gpu_aware_mpi 1.

316: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
317:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
318:                               the only available is kokkos.

320:    Level: intermediate
321: @*/
322: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
323: {
324:   PetscSFType    deft;
325:   char           type[256];
327:   PetscBool      flg;

330:   PetscObjectOptionsBegin((PetscObject)sf);
331:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
332:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
333:   PetscSFSetType(sf,flg ? type : deft);
334:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
335:  #if defined(PETSC_HAVE_DEVICE)
336:   {
337:     char        backendstr[32] = {0};
338:     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
339:     /* Change the defaults set in PetscSFCreate() with command line options */
340:     PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
341:     PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
342:     PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
343:     PetscStrcasecmp("cuda",backendstr,&isCuda);
344:     PetscStrcasecmp("kokkos",backendstr,&isKokkos);
345:     PetscStrcasecmp("hip",backendstr,&isHip);
346:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
347:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
348:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
349:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
351:   #elif defined(PETSC_HAVE_KOKKOS)
353:    #endif
354:   }
355:  #endif
356:   if (sf->ops->SetFromOptions) (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);
357:   PetscOptionsEnd();
358:   return 0;
359: }

361: /*@
362:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

364:    Logically Collective

366:    Input Parameters:
367: +  sf - star forest
368: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

370:    Level: advanced

372: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
373: @*/
374: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
375: {
379:   sf->rankorder = flg;
380:   return 0;
381: }

383: /*@C
384:    PetscSFSetGraph - Set a parallel star forest

386:    Collective

388:    Input Parameters:
389: +  sf - star forest
390: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
391: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
392: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
393: during setup in debug mode)
394: .  localmode - copy mode for ilocal
395: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
396: during setup in debug mode)
397: -  remotemode - copy mode for iremote

399:    Level: intermediate

401:    Notes:
402:    Leaf indices in ilocal must be unique, otherwise an error occurs.

404:    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
405:    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
406:    PETSc might modify the respective array;
407:    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
408:    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

410:    Fortran Notes:
411:    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.

413:    Developer Notes:
414:    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
415:    This also allows to compare leaf sets of two SFs easily.

417: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
418: @*/
419: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
420: {
421:   PetscBool       unique, contiguous;


431:   if (sf->nroots >= 0) { /* Reset only if graph already set */
432:     PetscSFReset(sf);
433:   }

435:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

437:   sf->nroots  = nroots;
438:   sf->nleaves = nleaves;

440:   if (localmode == PETSC_COPY_VALUES && ilocal) {
441:     PetscInt *tlocal = NULL;

443:     PetscMalloc1(nleaves,&tlocal);
444:     PetscArraycpy(tlocal,ilocal,nleaves);
445:     ilocal = tlocal;
446:   }
447:   if (remotemode == PETSC_COPY_VALUES) {
448:     PetscSFNode *tremote = NULL;

450:     PetscMalloc1(nleaves,&tremote);
451:     PetscArraycpy(tremote,iremote,nleaves);
452:     iremote = tremote;
453:   }

455:   if (nleaves && ilocal) {
456:     PetscSFNode   work;

458:     PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
459:     PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
460:     unique = PetscNot(unique);
462:     sf->minleaf = ilocal[0];
463:     sf->maxleaf = ilocal[nleaves-1];
464:     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
465:   } else {
466:     sf->minleaf = 0;
467:     sf->maxleaf = nleaves - 1;
468:     unique      = PETSC_TRUE;
469:     contiguous  = PETSC_TRUE;
470:   }

472:   if (contiguous) {
473:     if (localmode == PETSC_USE_POINTER) {
474:       ilocal = NULL;
475:     } else {
476:       PetscFree(ilocal);
477:     }
478:   }
479:   sf->mine            = ilocal;
480:   if (localmode == PETSC_USE_POINTER) {
481:     sf->mine_alloc    = NULL;
482:   } else {
483:     sf->mine_alloc    = ilocal;
484:   }
485:   sf->remote          = iremote;
486:   if (remotemode == PETSC_USE_POINTER) {
487:     sf->remote_alloc  = NULL;
488:   } else {
489:     sf->remote_alloc  = iremote;
490:   }
491:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
492:   sf->graphset = PETSC_TRUE;
493:   return 0;
494: }

496: /*@
497:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

499:   Collective

501:   Input Parameters:
502: + sf      - The PetscSF
503: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
504: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

506:   Notes:
507:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
508:   n and N are local and global sizes of x respectively.

510:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
511:   sequential vectors y on all ranks.

513:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
514:   sequential vector y on rank 0.

516:   In above cases, entries of x are roots and entries of y are leaves.

518:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
519:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
520:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
521:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
522:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

524:   In this case, roots and leaves are symmetric.

526:   Level: intermediate
527:  @*/
528: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
529: {
530:   MPI_Comm       comm;
531:   PetscInt       n,N,res[2];
532:   PetscMPIInt    rank,size;
533:   PetscSFType    type;

537:   PetscObjectGetComm((PetscObject)sf, &comm);
539:   MPI_Comm_rank(comm,&rank);
540:   MPI_Comm_size(comm,&size);

542:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
543:     type = PETSCSFALLTOALL;
544:     PetscLayoutCreate(comm,&sf->map);
545:     PetscLayoutSetLocalSize(sf->map,size);
546:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
547:     PetscLayoutSetUp(sf->map);
548:   } else {
549:     PetscLayoutGetLocalSize(map,&n);
550:     PetscLayoutGetSize(map,&N);
551:     res[0] = n;
552:     res[1] = -n;
553:     /* Check if n are same over all ranks so that we can optimize it */
554:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
555:     if (res[0] == -res[1]) { /* same n */
556:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
557:     } else {
558:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
559:     }
560:     PetscLayoutReference(map,&sf->map);
561:   }
562:   PetscSFSetType(sf,type);

564:   sf->pattern = pattern;
565:   sf->mine    = NULL; /* Contiguous */

567:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
568:      Also set other easy stuff.
569:    */
570:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
571:     sf->nleaves      = N;
572:     sf->nroots       = n;
573:     sf->nranks       = size;
574:     sf->minleaf      = 0;
575:     sf->maxleaf      = N - 1;
576:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
577:     sf->nleaves      = rank ? 0 : N;
578:     sf->nroots       = n;
579:     sf->nranks       = rank ? 0 : size;
580:     sf->minleaf      = 0;
581:     sf->maxleaf      = rank ? -1 : N - 1;
582:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
583:     sf->nleaves      = size;
584:     sf->nroots       = size;
585:     sf->nranks       = size;
586:     sf->minleaf      = 0;
587:     sf->maxleaf      = size - 1;
588:   }
589:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
590:   sf->graphset = PETSC_TRUE;
591:   return 0;
592: }

594: /*@
595:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

597:    Collective

599:    Input Parameter:
600: .  sf - star forest to invert

602:    Output Parameter:
603: .  isf - inverse of sf

605:    Level: advanced

607:    Notes:
608:    All roots must have degree 1.

610:    The local space may be a permutation, but cannot be sparse.

612: .seealso: PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
615: {
616:   PetscMPIInt    rank;
617:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
618:   const PetscInt *ilocal;
619:   PetscSFNode    *roots,*leaves;

622:   PetscSFCheckGraphSet(sf,1);

625:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
626:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

628:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
629:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
630:   for (i=0; i<maxlocal; i++) {
631:     leaves[i].rank  = rank;
632:     leaves[i].index = i;
633:   }
634:   for (i=0; i <nroots; i++) {
635:     roots[i].rank  = -1;
636:     roots[i].index = -1;
637:   }
638:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
639:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);

641:   /* Check whether our leaves are sparse */
642:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
643:   if (count == nroots) newilocal = NULL;
644:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
645:     PetscMalloc1(count,&newilocal);
646:     for (i=0,count=0; i<nroots; i++) {
647:       if (roots[i].rank >= 0) {
648:         newilocal[count]   = i;
649:         roots[count].rank  = roots[i].rank;
650:         roots[count].index = roots[i].index;
651:         count++;
652:       }
653:     }
654:   }

656:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
657:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
658:   PetscFree2(roots,leaves);
659:   return 0;
660: }

662: /*@
663:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

665:    Collective

667:    Input Parameters:
668: +  sf - communication object to duplicate
669: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

671:    Output Parameter:
672: .  newsf - new communication object

674:    Level: beginner

676: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
677: @*/
678: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
679: {
680:   PetscSFType    type;
681:   MPI_Datatype   dtype=MPIU_SCALAR;

686:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
687:   PetscSFGetType(sf,&type);
688:   if (type) PetscSFSetType(*newsf,type);
689:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
690:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
691:     PetscSFCheckGraphSet(sf,1);
692:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
693:       PetscInt          nroots,nleaves;
694:       const PetscInt    *ilocal;
695:       const PetscSFNode *iremote;
696:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
697:       PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);
698:     } else {
699:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
700:     }
701:   }
702:   /* Since oldtype is committed, so is newtype, according to MPI */
703:   if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit,&dtype);
704:   (*newsf)->vscat.bs     = sf->vscat.bs;
705:   (*newsf)->vscat.unit   = dtype;
706:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
707:   (*newsf)->vscat.from_n = sf->vscat.from_n;
708:   /* Do not copy lsf. Build it on demand since it is rarely used */

710: #if defined(PETSC_HAVE_DEVICE)
711:   (*newsf)->backend              = sf->backend;
712:   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
713:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
714:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
715: #endif
716:   if (sf->ops->Duplicate) (*sf->ops->Duplicate)(sf,opt,*newsf);
717:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
718:   return 0;
719: }

721: /*@C
722:    PetscSFGetGraph - Get the graph specifying a parallel star forest

724:    Not Collective

726:    Input Parameter:
727: .  sf - star forest

729:    Output Parameters:
730: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
731: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
732: .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
733: -  iremote - remote locations of root vertices for each leaf on the current process

735:    Notes:
736:      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

738:      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
739:      see its manpage for details.

741:    Fortran Notes:
742:      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
743:      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.

745:      To check for a NULL ilocal use
746: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

748:    Level: intermediate

750: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
751: @*/
752: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
753: {
755:   if (sf->ops->GetGraph) {
756:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
757:   } else {
758:     if (nroots) *nroots = sf->nroots;
759:     if (nleaves) *nleaves = sf->nleaves;
760:     if (ilocal) *ilocal = sf->mine;
761:     if (iremote) *iremote = sf->remote;
762:   }
763:   return 0;
764: }

766: /*@
767:    PetscSFGetLeafRange - Get the active leaf ranges

769:    Not Collective

771:    Input Parameter:
772: .  sf - star forest

774:    Output Parameters:
775: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
776: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

778:    Level: developer

780: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
781: @*/
782: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
783: {
785:   PetscSFCheckGraphSet(sf,1);
786:   if (minleaf) *minleaf = sf->minleaf;
787:   if (maxleaf) *maxleaf = sf->maxleaf;
788:   return 0;
789: }

791: /*@C
792:    PetscSFViewFromOptions - View from Options

794:    Collective on PetscSF

796:    Input Parameters:
797: +  A - the star forest
798: .  obj - Optional object
799: -  name - command line option

801:    Level: intermediate
802: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
803: @*/
804: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
805: {
807:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
808:   return 0;
809: }

811: /*@C
812:    PetscSFView - view a star forest

814:    Collective

816:    Input Parameters:
817: +  sf - star forest
818: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

820:    Level: beginner

822: .seealso: PetscSFCreate(), PetscSFSetGraph()
823: @*/
824: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
825: {
826:   PetscBool         iascii;
827:   PetscViewerFormat format;

830:   if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);
833:   if (sf->graphset) PetscSFSetUp(sf);
834:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
835:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
836:     PetscMPIInt rank;
837:     PetscInt    ii,i,j;

839:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
840:     PetscViewerASCIIPushTab(viewer);
841:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
842:       if (!sf->graphset) {
843:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
844:         PetscViewerASCIIPopTab(viewer);
845:         return 0;
846:       }
847:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
848:       PetscViewerASCIIPushSynchronized(viewer);
849:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);
850:       for (i=0; i<sf->nleaves; i++) {
851:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
852:       }
853:       PetscViewerFlush(viewer);
854:       PetscViewerGetFormat(viewer,&format);
855:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
856:         PetscMPIInt *tmpranks,*perm;
857:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
858:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
859:         for (i=0; i<sf->nranks; i++) perm[i] = i;
860:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
861:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
862:         for (ii=0; ii<sf->nranks; ii++) {
863:           i = perm[ii];
864:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
865:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
866:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);
867:           }
868:         }
869:         PetscFree2(tmpranks,perm);
870:       }
871:       PetscViewerFlush(viewer);
872:       PetscViewerASCIIPopSynchronized(viewer);
873:     }
874:     PetscViewerASCIIPopTab(viewer);
875:   }
876:   if (sf->ops->View) (*sf->ops->View)(sf,viewer);
877:   return 0;
878: }

880: /*@C
881:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

883:    Not Collective

885:    Input Parameter:
886: .  sf - star forest

888:    Output Parameters:
889: +  nranks - number of ranks referenced by local part
890: .  ranks - array of ranks
891: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
892: .  rmine - concatenated array holding local indices referencing each remote rank
893: -  rremote - concatenated array holding remote indices referenced for each remote rank

895:    Level: developer

897: .seealso: PetscSFGetLeafRanks()
898: @*/
899: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
900: {
903:   if (sf->ops->GetRootRanks) {
904:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
905:   } else {
906:     /* The generic implementation */
907:     if (nranks)  *nranks  = sf->nranks;
908:     if (ranks)   *ranks   = sf->ranks;
909:     if (roffset) *roffset = sf->roffset;
910:     if (rmine)   *rmine   = sf->rmine;
911:     if (rremote) *rremote = sf->rremote;
912:   }
913:   return 0;
914: }

916: /*@C
917:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

919:    Not Collective

921:    Input Parameter:
922: .  sf - star forest

924:    Output Parameters:
925: +  niranks - number of leaf ranks referencing roots on this process
926: .  iranks - array of ranks
927: .  ioffset - offset in irootloc for each rank (length niranks+1)
928: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

930:    Level: developer

932: .seealso: PetscSFGetRootRanks()
933: @*/
934: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
935: {
938:   if (sf->ops->GetLeafRanks) {
939:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
940:   } else {
941:     PetscSFType type;
942:     PetscSFGetType(sf,&type);
943:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
944:   }
945:   return 0;
946: }

948: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
949:   PetscInt i;
950:   for (i=0; i<n; i++) {
951:     if (needle == list[i]) return PETSC_TRUE;
952:   }
953:   return PETSC_FALSE;
954: }

956: /*@C
957:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

959:    Collective

961:    Input Parameters:
962: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
963: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

965:    Level: developer

967: .seealso: PetscSFGetRootRanks()
968: @*/
969: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
970: {
971:   PetscTable         table;
972:   PetscTablePosition pos;
973:   PetscMPIInt        size,groupsize,*groupranks;
974:   PetscInt           *rcount,*ranks;
975:   PetscInt           i, irank = -1,orank = -1;

978:   PetscSFCheckGraphSet(sf,1);
979:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
980:   PetscTableCreate(10,size,&table);
981:   for (i=0; i<sf->nleaves; i++) {
982:     /* Log 1-based rank */
983:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
984:   }
985:   PetscTableGetCount(table,&sf->nranks);
986:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
987:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
988:   PetscTableGetHeadPosition(table,&pos);
989:   for (i=0; i<sf->nranks; i++) {
990:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
991:     ranks[i]--;             /* Convert back to 0-based */
992:   }
993:   PetscTableDestroy(&table);

995:   /* We expect that dgroup is reliably "small" while nranks could be large */
996:   {
997:     MPI_Group group = MPI_GROUP_NULL;
998:     PetscMPIInt *dgroupranks;
999:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1000:     MPI_Group_size(dgroup,&groupsize);
1001:     PetscMalloc1(groupsize,&dgroupranks);
1002:     PetscMalloc1(groupsize,&groupranks);
1003:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1004:     if (groupsize) MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);
1005:     MPI_Group_free(&group);
1006:     PetscFree(dgroupranks);
1007:   }

1009:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1010:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1011:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1012:       if (InList(ranks[i],groupsize,groupranks)) break;
1013:     }
1014:     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1015:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1016:     }
1017:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1018:       PetscInt tmprank,tmpcount;

1020:       tmprank             = ranks[i];
1021:       tmpcount            = rcount[i];
1022:       ranks[i]            = ranks[sf->ndranks];
1023:       rcount[i]           = rcount[sf->ndranks];
1024:       ranks[sf->ndranks]  = tmprank;
1025:       rcount[sf->ndranks] = tmpcount;
1026:       sf->ndranks++;
1027:     }
1028:   }
1029:   PetscFree(groupranks);
1030:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1031:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1032:   sf->roffset[0] = 0;
1033:   for (i=0; i<sf->nranks; i++) {
1034:     PetscMPIIntCast(ranks[i],sf->ranks+i);
1035:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1036:     rcount[i]        = 0;
1037:   }
1038:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1039:     /* short circuit */
1040:     if (orank != sf->remote[i].rank) {
1041:       /* Search for index of iremote[i].rank in sf->ranks */
1042:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1043:       if (irank < 0) {
1044:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1045:         if (irank >= 0) irank += sf->ndranks;
1046:       }
1047:       orank = sf->remote[i].rank;
1048:     }
1050:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1051:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1052:     rcount[irank]++;
1053:   }
1054:   PetscFree2(rcount,ranks);
1055:   return 0;
1056: }

1058: /*@C
1059:    PetscSFGetGroups - gets incoming and outgoing process groups

1061:    Collective

1063:    Input Parameter:
1064: .  sf - star forest

1066:    Output Parameters:
1067: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1068: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1070:    Level: developer

1072: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1073: @*/
1074: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1075: {
1076:   MPI_Group      group = MPI_GROUP_NULL;

1079:   if (sf->ingroup == MPI_GROUP_NULL) {
1080:     PetscInt       i;
1081:     const PetscInt *indegree;
1082:     PetscMPIInt    rank,*outranks,*inranks;
1083:     PetscSFNode    *remote;
1084:     PetscSF        bgcount;

1086:     /* Compute the number of incoming ranks */
1087:     PetscMalloc1(sf->nranks,&remote);
1088:     for (i=0; i<sf->nranks; i++) {
1089:       remote[i].rank  = sf->ranks[i];
1090:       remote[i].index = 0;
1091:     }
1092:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1093:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1094:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1095:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1096:     /* Enumerate the incoming ranks */
1097:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1098:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1099:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1100:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1101:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1102:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1103:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1104:     MPI_Group_free(&group);
1105:     PetscFree2(inranks,outranks);
1106:     PetscSFDestroy(&bgcount);
1107:   }
1108:   *incoming = sf->ingroup;

1110:   if (sf->outgroup == MPI_GROUP_NULL) {
1111:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1112:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1113:     MPI_Group_free(&group);
1114:   }
1115:   *outgoing = sf->outgroup;
1116:   return 0;
1117: }

1119: /*@
1120:    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters

1122:    Collective

1124:    Input Parameter:
1125: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1127:    Output Parameter:
1128: .  multi - star forest with split roots, such that each root has degree exactly 1

1130:    Level: developer

1132:    Notes:

1134:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1135:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1136:    edge, it is a candidate for future optimization that might involve its removal.

1138: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1139: @*/
1140: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1141: {
1144:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1145:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1146:     *multi = sf->multi;
1147:     sf->multi->multi = sf->multi;
1148:     return 0;
1149:   }
1150:   if (!sf->multi) {
1151:     const PetscInt *indegree;
1152:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1153:     PetscSFNode    *remote;
1154:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1155:     PetscSFComputeDegreeBegin(sf,&indegree);
1156:     PetscSFComputeDegreeEnd(sf,&indegree);
1157:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1158:     inoffset[0] = 0;
1159:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1160:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1161:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1162:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1163:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1164:     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1165:       for (i=0; i<sf->nroots; i++) {
1167:       }
1168:     }
1169:     PetscMalloc1(sf->nleaves,&remote);
1170:     for (i=0; i<sf->nleaves; i++) {
1171:       remote[i].rank  = sf->remote[i].rank;
1172:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1173:     }
1174:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1175:     sf->multi->multi = sf->multi;
1176:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1177:     if (sf->rankorder) {        /* Sort the ranks */
1178:       PetscMPIInt rank;
1179:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1180:       PetscSFNode *newremote;
1181:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1182:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1183:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1184:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1185:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1186:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1187:       /* Sort the incoming ranks at each vertex, build the inverse map */
1188:       for (i=0; i<sf->nroots; i++) {
1189:         PetscInt j;
1190:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1191:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1192:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1193:       }
1194:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1195:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1196:       PetscMalloc1(sf->nleaves,&newremote);
1197:       for (i=0; i<sf->nleaves; i++) {
1198:         newremote[i].rank  = sf->remote[i].rank;
1199:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1200:       }
1201:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1202:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1203:     }
1204:     PetscFree3(inoffset,outones,outoffset);
1205:   }
1206:   *multi = sf->multi;
1207:   return 0;
1208: }

1210: /*@C
1211:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1213:    Collective

1215:    Input Parameters:
1216: +  sf - original star forest
1217: .  nselected  - number of selected roots on this process
1218: -  selected   - indices of the selected roots on this process

1220:    Output Parameter:
1221: .  esf - new star forest

1223:    Level: advanced

1225:    Note:
1226:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1227:    be done by calling PetscSFGetGraph().

1229: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1230: @*/
1231: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1232: {
1233:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1234:   const PetscInt    *ilocal;
1235:   signed char       *rootdata,*leafdata,*leafmem;
1236:   const PetscSFNode *iremote;
1237:   PetscSFNode       *new_iremote;
1238:   MPI_Comm          comm;

1241:   PetscSFCheckGraphSet(sf,1);

1245:   PetscSFSetUp(sf);
1246:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1247:   PetscObjectGetComm((PetscObject)sf,&comm);
1248:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1250:   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1251:     PetscBool dups;
1254:     for (i=0; i<nselected; i++)
1256:   }

1258:   if (sf->ops->CreateEmbeddedRootSF) {
1259:     (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1260:   } else {
1261:     /* A generic version of creating embedded sf */
1262:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1263:     maxlocal = maxleaf - minleaf + 1;
1264:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1265:     leafdata = leafmem - minleaf;
1266:     /* Tag selected roots and bcast to leaves */
1267:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1268:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1269:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);

1271:     /* Build esf with leaves that are still connected */
1272:     esf_nleaves = 0;
1273:     for (i=0; i<nleaves; i++) {
1274:       j = ilocal ? ilocal[i] : i;
1275:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1276:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1277:       */
1278:       esf_nleaves += (leafdata[j] ? 1 : 0);
1279:     }
1280:     PetscMalloc1(esf_nleaves,&new_ilocal);
1281:     PetscMalloc1(esf_nleaves,&new_iremote);
1282:     for (i=n=0; i<nleaves; i++) {
1283:       j = ilocal ? ilocal[i] : i;
1284:       if (leafdata[j]) {
1285:         new_ilocal[n]        = j;
1286:         new_iremote[n].rank  = iremote[i].rank;
1287:         new_iremote[n].index = iremote[i].index;
1288:         ++n;
1289:       }
1290:     }
1291:     PetscSFCreate(comm,esf);
1292:     PetscSFSetFromOptions(*esf);
1293:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1294:     PetscFree2(rootdata,leafmem);
1295:   }
1296:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1297:   return 0;
1298: }

1300: /*@C
1301:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1303:   Collective

1305:   Input Parameters:
1306: + sf - original star forest
1307: . nselected  - number of selected leaves on this process
1308: - selected   - indices of the selected leaves on this process

1310:   Output Parameter:
1311: .  newsf - new star forest

1313:   Level: advanced

1315: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1316: @*/
1317: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1318: {
1319:   const PetscSFNode *iremote;
1320:   PetscSFNode       *new_iremote;
1321:   const PetscInt    *ilocal;
1322:   PetscInt          i,nroots,*leaves,*new_ilocal;
1323:   MPI_Comm          comm;

1326:   PetscSFCheckGraphSet(sf,1);

1330:   /* Uniq selected[] and put results in leaves[] */
1331:   PetscObjectGetComm((PetscObject)sf,&comm);
1332:   PetscMalloc1(nselected,&leaves);
1333:   PetscArraycpy(leaves,selected,nselected);
1334:   PetscSortedRemoveDupsInt(&nselected,leaves);

1337:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1338:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1339:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1340:   } else {
1341:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1342:     PetscMalloc1(nselected,&new_ilocal);
1343:     PetscMalloc1(nselected,&new_iremote);
1344:     for (i=0; i<nselected; ++i) {
1345:       const PetscInt l     = leaves[i];
1346:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1347:       new_iremote[i].rank  = iremote[l].rank;
1348:       new_iremote[i].index = iremote[l].index;
1349:     }
1350:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1351:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1352:   }
1353:   PetscFree(leaves);
1354:   return 0;
1355: }

1357: /*@C
1358:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()

1360:    Collective on PetscSF

1362:    Input Parameters:
1363: +  sf - star forest on which to communicate
1364: .  unit - data type associated with each node
1365: .  rootdata - buffer to broadcast
1366: -  op - operation to use for reduction

1368:    Output Parameter:
1369: .  leafdata - buffer to be reduced with values from each leaf's respective root

1371:    Level: intermediate

1373:    Notes:
1374:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1375:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1376:     use PetscSFBcastWithMemTypeBegin() instead.
1377: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1378: @*/
1379: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1380: {
1381:   PetscMemType   rootmtype,leafmtype;

1384:   PetscSFSetUp(sf);
1385:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1386:   PetscGetMemType(rootdata,&rootmtype);
1387:   PetscGetMemType(leafdata,&leafmtype);
1388:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1389:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1390:   return 0;
1391: }

1393: /*@C
1394:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()

1396:    Collective on PetscSF

1398:    Input Parameters:
1399: +  sf - star forest on which to communicate
1400: .  unit - data type associated with each node
1401: .  rootmtype - memory type of rootdata
1402: .  rootdata - buffer to broadcast
1403: .  leafmtype - memory type of leafdata
1404: -  op - operation to use for reduction

1406:    Output Parameter:
1407: .  leafdata - buffer to be reduced with values from each leaf's respective root

1409:    Level: intermediate

1411: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1412: @*/
1413: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1414: {
1416:   PetscSFSetUp(sf);
1417:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1418:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1419:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1420:   return 0;
1421: }

1423: /*@C
1424:    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()

1426:    Collective

1428:    Input Parameters:
1429: +  sf - star forest
1430: .  unit - data type
1431: .  rootdata - buffer to broadcast
1432: -  op - operation to use for reduction

1434:    Output Parameter:
1435: .  leafdata - buffer to be reduced with values from each leaf's respective root

1437:    Level: intermediate

1439: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1440: @*/
1441: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1442: {
1444:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);
1445:   (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1446:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);
1447:   return 0;
1448: }

1450: /*@C
1451:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1453:    Collective

1455:    Input Parameters:
1456: +  sf - star forest
1457: .  unit - data type
1458: .  leafdata - values to reduce
1459: -  op - reduction operation

1461:    Output Parameter:
1462: .  rootdata - result of reduction of values from all leaves of each root

1464:    Level: intermediate

1466:    Notes:
1467:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1468:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1469:     use PetscSFReduceWithMemTypeBegin() instead.

1471: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1472: @*/
1473: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1474: {
1475:   PetscMemType   rootmtype,leafmtype;

1478:   PetscSFSetUp(sf);
1479:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1480:   PetscGetMemType(rootdata,&rootmtype);
1481:   PetscGetMemType(leafdata,&leafmtype);
1482:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1483:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1484:   return 0;
1485: }

1487: /*@C
1488:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()

1490:    Collective

1492:    Input Parameters:
1493: +  sf - star forest
1494: .  unit - data type
1495: .  leafmtype - memory type of leafdata
1496: .  leafdata - values to reduce
1497: .  rootmtype - memory type of rootdata
1498: -  op - reduction operation

1500:    Output Parameter:
1501: .  rootdata - result of reduction of values from all leaves of each root

1503:    Level: intermediate

1505: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1506: @*/
1507: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1508: {
1510:   PetscSFSetUp(sf);
1511:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1512:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1513:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1514:   return 0;
1515: }

1517: /*@C
1518:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1520:    Collective

1522:    Input Parameters:
1523: +  sf - star forest
1524: .  unit - data type
1525: .  leafdata - values to reduce
1526: -  op - reduction operation

1528:    Output Parameter:
1529: .  rootdata - result of reduction of values from all leaves of each root

1531:    Level: intermediate

1533: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1534: @*/
1535: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1536: {
1538:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1539:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1540:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1541:   return 0;
1542: }

1544: /*@C
1545:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1547:    Collective

1549:    Input Parameters:
1550: +  sf - star forest
1551: .  unit - data type
1552: .  leafdata - leaf values to use in reduction
1553: -  op - operation to use for reduction

1555:    Output Parameters:
1556: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1557: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1559:    Level: advanced

1561:    Note:
1562:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1563:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1564:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1565:    integers.

1567: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1568: @*/
1569: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1570: {
1571:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1574:   PetscSFSetUp(sf);
1575:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1576:   PetscGetMemType(rootdata,&rootmtype);
1577:   PetscGetMemType(leafdata,&leafmtype);
1578:   PetscGetMemType(leafupdate,&leafupdatemtype);
1580:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1581:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1582:   return 0;
1583: }

1585: /*@C
1586:    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1588:    Collective

1590:    Input Parameters:
1591: +  sf - star forest
1592: .  unit - data type
1593: .  rootmtype - memory type of rootdata
1594: .  leafmtype - memory type of leafdata
1595: .  leafdata - leaf values to use in reduction
1596: .  leafupdatemtype - memory type of leafupdate
1597: -  op - operation to use for reduction

1599:    Output Parameters:
1600: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1601: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1603:    Level: advanced

1605:    Note: See PetscSFFetchAndOpBegin() for more details.

1607: .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1608: @*/
1609: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1610: {
1612:   PetscSFSetUp(sf);
1613:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1615:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1616:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1617:   return 0;
1618: }

1620: /*@C
1621:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1623:    Collective

1625:    Input Parameters:
1626: +  sf - star forest
1627: .  unit - data type
1628: .  leafdata - leaf values to use in reduction
1629: -  op - operation to use for reduction

1631:    Output Parameters:
1632: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1633: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1635:    Level: advanced

1637: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1638: @*/
1639: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1640: {
1642:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1643:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1644:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1645:   return 0;
1646: }

1648: /*@C
1649:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1651:    Collective

1653:    Input Parameter:
1654: .  sf - star forest

1656:    Output Parameter:
1657: .  degree - degree of each root vertex

1659:    Level: advanced

1661:    Notes:
1662:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1664: .seealso: PetscSFGatherBegin()
1665: @*/
1666: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1667: {
1669:   PetscSFCheckGraphSet(sf,1);
1671:   if (!sf->degreeknown) {
1672:     PetscInt i, nroots = sf->nroots, maxlocal;
1674:     maxlocal = sf->maxleaf-sf->minleaf+1;
1675:     PetscMalloc1(nroots,&sf->degree);
1676:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1677:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1678:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1679:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1680:   }
1681:   *degree = NULL;
1682:   return 0;
1683: }

1685: /*@C
1686:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1688:    Collective

1690:    Input Parameter:
1691: .  sf - star forest

1693:    Output Parameter:
1694: .  degree - degree of each root vertex

1696:    Level: developer

1698:    Notes:
1699:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1701: .seealso:
1702: @*/
1703: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1704: {
1706:   PetscSFCheckGraphSet(sf,1);
1708:   if (!sf->degreeknown) {
1710:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1711:     PetscFree(sf->degreetmp);
1712:     sf->degreeknown = PETSC_TRUE;
1713:   }
1714:   *degree = sf->degree;
1715:   return 0;
1716: }

1718: /*@C
1719:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1720:    Each multi-root is assigned index of the corresponding original root.

1722:    Collective

1724:    Input Parameters:
1725: +  sf - star forest
1726: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1728:    Output Parameters:
1729: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1730: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1732:    Level: developer

1734:    Notes:
1735:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1737: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1738: @*/
1739: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1740: {
1741:   PetscSF             msf;
1742:   PetscInt            i, j, k, nroots, nmroots;

1745:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1749:   PetscSFGetMultiSF(sf,&msf);
1750:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1751:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1752:   for (i=0,j=0,k=0; i<nroots; i++) {
1753:     if (!degree[i]) continue;
1754:     for (j=0; j<degree[i]; j++,k++) {
1755:       (*multiRootsOrigNumbering)[k] = i;
1756:     }
1757:   }
1759:   if (nMultiRoots) *nMultiRoots = nmroots;
1760:   return 0;
1761: }

1763: /*@C
1764:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1766:    Collective

1768:    Input Parameters:
1769: +  sf - star forest
1770: .  unit - data type
1771: -  leafdata - leaf data to gather to roots

1773:    Output Parameter:
1774: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1776:    Level: intermediate

1778: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1779: @*/
1780: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1781: {
1782:   PetscSF        multi = NULL;

1785:   PetscSFSetUp(sf);
1786:   PetscSFGetMultiSF(sf,&multi);
1787:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1788:   return 0;
1789: }

1791: /*@C
1792:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1794:    Collective

1796:    Input Parameters:
1797: +  sf - star forest
1798: .  unit - data type
1799: -  leafdata - leaf data to gather to roots

1801:    Output Parameter:
1802: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1804:    Level: intermediate

1806: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1807: @*/
1808: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1809: {
1810:   PetscSF        multi = NULL;

1813:   PetscSFGetMultiSF(sf,&multi);
1814:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1815:   return 0;
1816: }

1818: /*@C
1819:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1821:    Collective

1823:    Input Parameters:
1824: +  sf - star forest
1825: .  unit - data type
1826: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1828:    Output Parameter:
1829: .  leafdata - leaf data to be update with personal data from each respective root

1831:    Level: intermediate

1833: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1834: @*/
1835: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1836: {
1837:   PetscSF        multi = NULL;

1840:   PetscSFSetUp(sf);
1841:   PetscSFGetMultiSF(sf,&multi);
1842:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1843:   return 0;
1844: }

1846: /*@C
1847:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1849:    Collective

1851:    Input Parameters:
1852: +  sf - star forest
1853: .  unit - data type
1854: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1856:    Output Parameter:
1857: .  leafdata - leaf data to be update with personal data from each respective root

1859:    Level: intermediate

1861: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1862: @*/
1863: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1864: {
1865:   PetscSF        multi = NULL;

1868:   PetscSFGetMultiSF(sf,&multi);
1869:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1870:   return 0;
1871: }

1873: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1874: {
1875:   PetscInt        i, n, nleaves;
1876:   const PetscInt *ilocal = NULL;
1877:   PetscHSetI      seen;

1879:   if (PetscDefined(USE_DEBUG)) {
1880:     PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1881:     PetscHSetICreate(&seen);
1882:     for (i = 0; i < nleaves; i++) {
1883:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1884:       PetscHSetIAdd(seen,leaf);
1885:     }
1886:     PetscHSetIGetSize(seen,&n);
1888:     PetscHSetIDestroy(&seen);
1889:   }
1890:   return 0;
1891: }

1893: /*@
1894:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1896:   Input Parameters:
1897: + sfA - The first PetscSF
1898: - sfB - The second PetscSF

1900:   Output Parameters:
1901: . sfBA - The composite SF

1903:   Level: developer

1905:   Notes:
1906:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1907:   forests, i.e. the same leaf is not connected with different roots.

1909:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1910:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1911:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1912:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1914: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1915: @*/
1916: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1917: {
1918:   const PetscSFNode *remotePointsA,*remotePointsB;
1919:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1920:   const PetscInt    *localPointsA,*localPointsB;
1921:   PetscInt          *localPointsBA;
1922:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1923:   PetscBool         denseB;

1926:   PetscSFCheckGraphSet(sfA,1);
1928:   PetscSFCheckGraphSet(sfB,2);
1931:   PetscSFCheckLeavesUnique_Private(sfA);
1932:   PetscSFCheckLeavesUnique_Private(sfB);

1934:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1935:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1936:   if (localPointsA) {
1937:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1938:     for (i=0; i<numRootsB; i++) {
1939:       reorderedRemotePointsA[i].rank = -1;
1940:       reorderedRemotePointsA[i].index = -1;
1941:     }
1942:     for (i=0; i<numLeavesA; i++) {
1943:       if (localPointsA[i] >= numRootsB) continue;
1944:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1945:     }
1946:     remotePointsA = reorderedRemotePointsA;
1947:   }
1948:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1949:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1950:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1951:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1952:   PetscFree(reorderedRemotePointsA);

1954:   denseB = (PetscBool)!localPointsB;
1955:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1956:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1957:     else numLeavesBA++;
1958:   }
1959:   if (denseB) {
1960:     localPointsBA  = NULL;
1961:     remotePointsBA = leafdataB;
1962:   } else {
1963:     PetscMalloc1(numLeavesBA,&localPointsBA);
1964:     PetscMalloc1(numLeavesBA,&remotePointsBA);
1965:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1966:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1968:       if (leafdataB[l-minleaf].rank == -1) continue;
1969:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1970:       localPointsBA[numLeavesBA] = l;
1971:       numLeavesBA++;
1972:     }
1973:     PetscFree(leafdataB);
1974:   }
1975:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1976:   PetscSFSetFromOptions(*sfBA);
1977:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1978:   return 0;
1979: }

1981: /*@
1982:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1984:   Input Parameters:
1985: + sfA - The first PetscSF
1986: - sfB - The second PetscSF

1988:   Output Parameters:
1989: . sfBA - The composite SF.

1991:   Level: developer

1993:   Notes:
1994:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1995:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1996:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

1998:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1999:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2000:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2001:   a Reduce on sfB, on connected roots.

2003: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2004: @*/
2005: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2006: {
2007:   const PetscSFNode *remotePointsA,*remotePointsB;
2008:   PetscSFNode       *remotePointsBA;
2009:   const PetscInt    *localPointsA,*localPointsB;
2010:   PetscSFNode       *reorderedRemotePointsA = NULL;
2011:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2012:   MPI_Op            op;
2013: #if defined(PETSC_USE_64BIT_INDICES)
2014:   PetscBool         iswin;
2015: #endif

2018:   PetscSFCheckGraphSet(sfA,1);
2020:   PetscSFCheckGraphSet(sfB,2);
2023:   PetscSFCheckLeavesUnique_Private(sfA);
2024:   PetscSFCheckLeavesUnique_Private(sfB);

2026:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2027:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2029:   /* TODO: Check roots of sfB have degree of 1 */
2030:   /* Once we implement it, we can replace the MPI_MAXLOC
2031:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2032:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2033:      the root condition is not meet.
2034:    */
2035:   op = MPI_MAXLOC;
2036: #if defined(PETSC_USE_64BIT_INDICES)
2037:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2038:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2039:   if (iswin) op = MPI_REPLACE;
2040: #endif

2042:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2043:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2044:   for (i=0; i<maxleaf - minleaf + 1; i++) {
2045:     reorderedRemotePointsA[i].rank = -1;
2046:     reorderedRemotePointsA[i].index = -1;
2047:   }
2048:   if (localPointsA) {
2049:     for (i=0; i<numLeavesA; i++) {
2050:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2051:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2052:     }
2053:   } else {
2054:     for (i=0; i<numLeavesA; i++) {
2055:       if (i > maxleaf || i < minleaf) continue;
2056:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2057:     }
2058:   }

2060:   PetscMalloc1(numRootsB,&localPointsBA);
2061:   PetscMalloc1(numRootsB,&remotePointsBA);
2062:   for (i=0; i<numRootsB; i++) {
2063:     remotePointsBA[i].rank = -1;
2064:     remotePointsBA[i].index = -1;
2065:   }

2067:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2068:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2069:   PetscFree(reorderedRemotePointsA);
2070:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2071:     if (remotePointsBA[i].rank == -1) continue;
2072:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2073:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2074:     localPointsBA[numLeavesBA] = i;
2075:     numLeavesBA++;
2076:   }
2077:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2078:   PetscSFSetFromOptions(*sfBA);
2079:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2080:   return 0;
2081: }

2083: /*
2084:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2086:   Input Parameters:
2087: . sf - The global PetscSF

2089:   Output Parameters:
2090: . out - The local PetscSF
2091:  */
2092: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2093: {
2094:   MPI_Comm           comm;
2095:   PetscMPIInt        myrank;
2096:   const PetscInt     *ilocal;
2097:   const PetscSFNode  *iremote;
2098:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2099:   PetscSFNode        *liremote;
2100:   PetscSF            lsf;

2103:   if (sf->ops->CreateLocalSF) {
2104:     (*sf->ops->CreateLocalSF)(sf,out);
2105:   } else {
2106:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2107:     PetscObjectGetComm((PetscObject)sf,&comm);
2108:     MPI_Comm_rank(comm,&myrank);

2110:     /* Find out local edges and build a local SF */
2111:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2112:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2113:     PetscMalloc1(lnleaves,&lilocal);
2114:     PetscMalloc1(lnleaves,&liremote);

2116:     for (i=j=0; i<nleaves; i++) {
2117:       if (iremote[i].rank == (PetscInt)myrank) {
2118:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2119:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2120:         liremote[j].index = iremote[i].index;
2121:         j++;
2122:       }
2123:     }
2124:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2125:     PetscSFSetFromOptions(lsf);
2126:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2127:     PetscSFSetUp(lsf);
2128:     *out = lsf;
2129:   }
2130:   return 0;
2131: }

2133: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2134: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2135: {
2136:   PetscMemType       rootmtype,leafmtype;

2139:   PetscSFSetUp(sf);
2140:   PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2141:   PetscGetMemType(rootdata,&rootmtype);
2142:   PetscGetMemType(leafdata,&leafmtype);
2143:   if (sf->ops->BcastToZero) {
2144:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2145:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2146:   PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2147:   return 0;
2148: }

2150: /*@
2151:   PetscSFConcatenate - concatenate multiple SFs into one

2153:   Input Parameters:
2154: + comm - the communicator
2155: . nsfs - the number of input PetscSF
2156: . sfs  - the array of input PetscSF
2157: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2158: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage

2160:   Output Parameters:
2161: . newsf - The resulting PetscSF

2163:   Level: developer

2165:   Notes:
2166:   The communicator of all SFs in sfs must be comm.

2168:   The offsets in leafOffsets are added to the original leaf indices.

2170:   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2171:   In this case, NULL is also passed as ilocal to the resulting SF.

2173:   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2174:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2176: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2177: @*/
2178: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2179: {
2180:   PetscInt            i, s, nLeaves, nRoots;
2181:   PetscInt           *leafArrayOffsets;
2182:   PetscInt           *ilocal_new;
2183:   PetscSFNode        *iremote_new;
2184:   PetscInt           *rootOffsets;
2185:   PetscBool           all_ilocal_null = PETSC_FALSE;
2186:   PetscMPIInt         rank;

2188:   {
2189:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2191:     PetscSFCreate(comm,&dummy);
2194:     for (i=0; i<nsfs; i++) {
2197:     }
2201:     PetscSFDestroy(&dummy);
2202:   }
2203:   if (!nsfs) {
2204:     PetscSFCreate(comm, newsf);
2205:     PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2206:     return 0;
2207:   }
2208:   MPI_Comm_rank(comm, &rank);

2210:   PetscCalloc1(nsfs+1, &rootOffsets);
2211:   if (shareRoots) {
2212:     PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2213:     if (PetscDefined(USE_DEBUG)) {
2214:       for (s=1; s<nsfs; s++) {
2215:         PetscInt nr;

2217:         PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2219:       }
2220:     }
2221:   } else {
2222:     for (s=0; s<nsfs; s++) {
2223:       PetscInt nr;

2225:       PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2226:       rootOffsets[s+1] = rootOffsets[s] + nr;
2227:     }
2228:     nRoots = rootOffsets[nsfs];
2229:   }

2231:   /* Calculate leaf array offsets and automatic root offsets */
2232:   PetscMalloc1(nsfs+1,&leafArrayOffsets);
2233:   leafArrayOffsets[0] = 0;
2234:   for (s=0; s<nsfs; s++) {
2235:     PetscInt        nl;

2237:     PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2238:     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2239:   }
2240:   nLeaves = leafArrayOffsets[nsfs];

2242:   if (!leafOffsets) {
2243:     all_ilocal_null = PETSC_TRUE;
2244:     for (s=0; s<nsfs; s++) {
2245:       const PetscInt *ilocal;

2247:       PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2248:       if (ilocal) {
2249:         all_ilocal_null = PETSC_FALSE;
2250:         break;
2251:       }
2252:     }
2254:   }

2256:   /* Renumber and concatenate local leaves */
2257:   ilocal_new = NULL;
2258:   if (!all_ilocal_null) {
2259:     PetscMalloc1(nLeaves, &ilocal_new);
2260:     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2261:     for (s = 0; s<nsfs; s++) {
2262:       const PetscInt   *ilocal;
2263:       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2264:       PetscInt          i, nleaves_l;

2266:       PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2267:       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2268:     }
2269:   }

2271:   /* Renumber and concatenate remote roots */
2272:   PetscMalloc1(nLeaves, &iremote_new);
2273:   for (i = 0; i < nLeaves; i++) {
2274:     iremote_new[i].rank   = -1;
2275:     iremote_new[i].index  = -1;
2276:   }
2277:   for (s = 0; s<nsfs; s++) {
2278:     PetscInt            i, nl, nr;
2279:     PetscSF             tmp_sf;
2280:     const PetscSFNode  *iremote;
2281:     PetscSFNode        *tmp_rootdata;
2282:     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2284:     PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2285:     PetscSFCreate(comm, &tmp_sf);
2286:     /* create helper SF with contiguous leaves */
2287:     PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);
2288:     PetscSFSetUp(tmp_sf);
2289:     PetscMalloc1(nr, &tmp_rootdata);
2290:     for (i = 0; i < nr; i++) {
2291:       tmp_rootdata[i].index = i + rootOffsets[s];
2292:       tmp_rootdata[i].rank  = (PetscInt) rank;
2293:     }
2294:     PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2295:     PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2296:     PetscSFDestroy(&tmp_sf);
2297:     PetscFree(tmp_rootdata);
2298:   }

2300:   /* Build the new SF */
2301:   PetscSFCreate(comm, newsf);
2302:   PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2303:   PetscSFSetUp(*newsf);
2304:   PetscFree(rootOffsets);
2305:   PetscFree(leafArrayOffsets);
2306:   return 0;
2307: }