Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petscctable.h>

  5: #if defined(PETSC_HAVE_CUDA)
  6:   #include <cuda_runtime.h>
  7: #endif

  9: #if defined(PETSC_HAVE_HIP)
 10:   #include <hip/hip_runtime.h>
 11: #endif

 13: #if defined(PETSC_USE_DEBUG)
 14: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
 15:     if (PetscUnlikely(!(sf)->graphset))                              \
 16:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
 17:   } while (0)
 18: #else
 19: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 20: #endif

 22: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};

 24: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
 25: {
 28:   *type = PETSC_MEMTYPE_HOST;
 29: #if defined(PETSC_HAVE_CUDA)
 30:   if (PetscCUDAInitialized && data) {
 31:     cudaError_t                  cerr;
 32:     struct cudaPointerAttributes attr;
 33:     enum cudaMemoryType          mtype;
 34:     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
 35:     cudaGetLastError(); /* Reset the last error */
 36:     #if (CUDART_VERSION < 10000)
 37:       mtype = attr.memoryType;
 38:     #else
 39:       mtype = attr.type;
 40:     #endif
 41:     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 42:   }
 43: #endif

 45: #if defined(PETSC_HAVE_HIP)
 46:   if (PetscHIPInitialized && data) {
 47:     hipError_t                   cerr;
 48:     struct hipPointerAttribute_t attr;
 49:     enum hipMemoryType           mtype;
 50:     cerr = hipPointerGetAttributes(&attr,data);
 51:     hipGetLastError(); /* Reset the last error */
 52:     mtype = attr.memoryType;
 53:     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 54:   }
 55: #endif
 56:   return(0);
 57: }

 59: /*@
 60:    PetscSFCreate - create a star forest communication context

 62:    Collective

 64:    Input Arguments:
 65: .  comm - communicator on which the star forest will operate

 67:    Output Arguments:
 68: .  sf - new star forest context

 70:    Options Database Keys:
 71: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 72: .  -sf_type window    -Use MPI-3 one-sided window for communication
 73: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 75:    Level: intermediate

 77:    Notes:
 78:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 79:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 80:    SFs are optimized and they have better performance than general SFs.

 82: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 83: @*/
 84: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 85: {
 87:   PetscSF        b;

 91:   PetscSFInitializePackage();

 93:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 95:   b->nroots    = -1;
 96:   b->nleaves   = -1;
 97:   b->minleaf   = PETSC_MAX_INT;
 98:   b->maxleaf   = PETSC_MIN_INT;
 99:   b->nranks    = -1;
100:   b->rankorder = PETSC_TRUE;
101:   b->ingroup   = MPI_GROUP_NULL;
102:   b->outgroup  = MPI_GROUP_NULL;
103:   b->graphset  = PETSC_FALSE;
104: #if defined(PETSC_HAVE_DEVICE)
105:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
106:   b->use_stream_aware_mpi = PETSC_FALSE;
107:   b->unknown_input_stream= PETSC_FALSE;
108:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109:     b->backend = PETSCSF_BACKEND_KOKKOS;
110:   #elif defined(PETSC_HAVE_CUDA)
111:     b->backend = PETSCSF_BACKEND_CUDA;
112:   #elif defined(PETSC_HAVE_HIP)
113:     b->backend = PETSCSF_BACKEND_HIP;
114:   #endif

116:   #if defined(PETSC_HAVE_NVSHMEM)
117:     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
118:     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
119:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
120:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
121:   #endif
122: #endif
123:   b->vscat.from_n = -1;
124:   b->vscat.to_n   = -1;
125:   b->vscat.unit   = MPIU_SCALAR;
126:  *sf = b;
127:   return(0);
128: }

130: /*@
131:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

133:    Collective

135:    Input Arguments:
136: .  sf - star forest

138:    Level: advanced

140: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
141: @*/
142: PetscErrorCode PetscSFReset(PetscSF sf)
143: {

148:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
149:   sf->nroots   = -1;
150:   sf->nleaves  = -1;
151:   sf->minleaf  = PETSC_MAX_INT;
152:   sf->maxleaf  = PETSC_MIN_INT;
153:   sf->mine     = NULL;
154:   sf->remote   = NULL;
155:   sf->graphset = PETSC_FALSE;
156:   PetscFree(sf->mine_alloc);
157:   PetscFree(sf->remote_alloc);
158:   sf->nranks = -1;
159:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
160:   sf->degreeknown = PETSC_FALSE;
161:   PetscFree(sf->degree);
162:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
163:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
164:   if (sf->multi) sf->multi->multi = NULL;
165:   PetscSFDestroy(&sf->multi);
166:   PetscLayoutDestroy(&sf->map);

168:  #if defined(PETSC_HAVE_DEVICE)
169:   for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
170:  #endif

172:   sf->setupcalled = PETSC_FALSE;
173:   return(0);
174: }

176: /*@C
177:    PetscSFSetType - Set the PetscSF communication implementation

179:    Collective on PetscSF

181:    Input Parameters:
182: +  sf - the PetscSF context
183: -  type - a known method

185:    Options Database Key:
186: .  -sf_type <type> - Sets the method; use -help for a list
187:    of available methods (for instance, window, basic, neighbor)

189:    Notes:
190:    See "include/petscsf.h" for available methods (for instance)
191: +    PETSCSFWINDOW - MPI-2/3 one-sided
192: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

194:   Level: intermediate

196: .seealso: PetscSFType, PetscSFCreate()
197: @*/
198: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
199: {
200:   PetscErrorCode ierr,(*r)(PetscSF);
201:   PetscBool      match;


207:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
208:   if (match) return(0);

210:   PetscFunctionListFind(PetscSFList,type,&r);
211:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
212:   /* Destroy the previous PetscSF implementation context */
213:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
214:   PetscMemzero(sf->ops,sizeof(*sf->ops));
215:   PetscObjectChangeTypeName((PetscObject)sf,type);
216:   (*r)(sf);
217:   return(0);
218: }

220: /*@C
221:   PetscSFGetType - Get the PetscSF communication implementation

223:   Not Collective

225:   Input Parameter:
226: . sf  - the PetscSF context

228:   Output Parameter:
229: . type - the PetscSF type name

231:   Level: intermediate

233: .seealso: PetscSFSetType(), PetscSFCreate()
234: @*/
235: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
236: {
240:   *type = ((PetscObject)sf)->type_name;
241:   return(0);
242: }

244: /*@C
245:    PetscSFDestroy - destroy star forest

247:    Collective

249:    Input Arguments:
250: .  sf - address of star forest

252:    Level: intermediate

254: .seealso: PetscSFCreate(), PetscSFReset()
255: @*/
256: PetscErrorCode PetscSFDestroy(PetscSF *sf)
257: {

261:   if (!*sf) return(0);
263:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
264:   PetscSFReset(*sf);
265:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
266:   PetscSFDestroy(&(*sf)->vscat.lsf);
267:   if ((*sf)->vscat.bs > 1) {MPI_Type_free(&(*sf)->vscat.unit);}
268:   PetscHeaderDestroy(sf);
269:   return(0);
270: }

272: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
273: {
274:   PetscInt           i, nleaves;
275:   PetscMPIInt        size;
276:   const PetscInt    *ilocal;
277:   const PetscSFNode *iremote;
278:   PetscErrorCode     ierr;

281:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
282:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
283:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
284:   for (i = 0; i < nleaves; i++) {
285:     const PetscInt rank = iremote[i].rank;
286:     const PetscInt remote = iremote[i].index;
287:     const PetscInt leaf = ilocal ? ilocal[i] : i;
288:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
289:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
290:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
291:   }
292:   return(0);
293: }

295: /*@
296:    PetscSFSetUp - set up communication structures

298:    Collective

300:    Input Arguments:
301: .  sf - star forest communication object

303:    Level: beginner

305: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
306: @*/
307: PetscErrorCode PetscSFSetUp(PetscSF sf)
308: {

313:   PetscSFCheckGraphSet(sf,1);
314:   if (sf->setupcalled) return(0);
315:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
316:   PetscSFCheckGraphValid_Private(sf);
317:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
318:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
319: #if defined(PETSC_HAVE_CUDA)
320:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
321:     sf->ops->Malloc = PetscSFMalloc_CUDA;
322:     sf->ops->Free   = PetscSFFree_CUDA;
323:   }
324: #endif
325: #if defined(PETSC_HAVE_HIP)
326:   if (sf->backend == PETSCSF_BACKEND_HIP) {
327:     sf->ops->Malloc = PetscSFMalloc_HIP;
328:     sf->ops->Free   = PetscSFFree_HIP;
329:   }
330: #endif

332: #
333: #if defined(PETSC_HAVE_KOKKOS)
334:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
335:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
336:     sf->ops->Free   = PetscSFFree_Kokkos;
337:   }
338: #endif
339:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
340:   sf->setupcalled = PETSC_TRUE;
341:   return(0);
342: }

344: /*@
345:    PetscSFSetFromOptions - set PetscSF options using the options database

347:    Logically Collective

349:    Input Arguments:
350: .  sf - star forest

352:    Options Database Keys:
353: +  -sf_type               - implementation type, see PetscSFSetType()
354: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
355: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
356:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
357:                             If true, this option only works with -use_gpu_aware_mpi 1.
358: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
359:                                If true, this option only works with -use_gpu_aware_mpi 1.

361: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
362:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
363:                               the only available is kokkos.

365:    Level: intermediate
366: @*/
367: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
368: {
369:   PetscSFType    deft;
370:   char           type[256];
372:   PetscBool      flg;

376:   PetscObjectOptionsBegin((PetscObject)sf);
377:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
378:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
379:   PetscSFSetType(sf,flg ? type : deft);
380:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
381:  #if defined(PETSC_HAVE_DEVICE)
382:   {
383:     char        backendstr[32] = {0};
384:     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
385:     /* Change the defaults set in PetscSFCreate() with command line options */
386:     PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
387:     PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
388:     PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
389:     PetscStrcasecmp("cuda",backendstr,&isCuda);
390:     PetscStrcasecmp("kokkos",backendstr,&isKokkos);
391:     PetscStrcasecmp("hip",backendstr,&isHip);
392:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
393:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
394:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
395:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
396:     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
397:   #elif defined(PETSC_HAVE_KOKKOS)
398:     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
399:    #endif
400:   }
401:  #endif
402:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
403:   PetscOptionsEnd();
404:   return(0);
405: }

407: /*@
408:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

410:    Logically Collective

412:    Input Arguments:
413: +  sf - star forest
414: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

416:    Level: advanced

418: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
419: @*/
420: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
421: {
425:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
426:   sf->rankorder = flg;
427:   return(0);
428: }

430: /*@
431:    PetscSFSetGraph - Set a parallel star forest

433:    Collective

435:    Input Arguments:
436: +  sf - star forest
437: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
438: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
439: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
440: during setup in debug mode)
441: .  localmode - copy mode for ilocal
442: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
443: during setup in debug mode)
444: -  remotemode - copy mode for iremote

446:    Level: intermediate

448:    Notes:
449:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

451:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
452:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
453:    needed

455:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
456:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

458: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
459: @*/
460: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
461: {

468:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
469:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

471:   if (sf->nroots >= 0) { /* Reset only if graph already set */
472:     PetscSFReset(sf);
473:   }

475:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

477:   sf->nroots  = nroots;
478:   sf->nleaves = nleaves;

480:   if (nleaves && ilocal) {
481:     PetscInt i;
482:     PetscInt minleaf = PETSC_MAX_INT;
483:     PetscInt maxleaf = PETSC_MIN_INT;
484:     int      contiguous = 1;
485:     for (i=0; i<nleaves; i++) {
486:       minleaf = PetscMin(minleaf,ilocal[i]);
487:       maxleaf = PetscMax(maxleaf,ilocal[i]);
488:       contiguous &= (ilocal[i] == i);
489:     }
490:     sf->minleaf = minleaf;
491:     sf->maxleaf = maxleaf;
492:     if (contiguous) {
493:       if (localmode == PETSC_OWN_POINTER) {
494:         PetscFree(ilocal);
495:       }
496:       ilocal = NULL;
497:     }
498:   } else {
499:     sf->minleaf = 0;
500:     sf->maxleaf = nleaves - 1;
501:   }

503:   if (ilocal) {
504:     switch (localmode) {
505:     case PETSC_COPY_VALUES:
506:       PetscMalloc1(nleaves,&sf->mine_alloc);
507:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
508:       sf->mine = sf->mine_alloc;
509:       break;
510:     case PETSC_OWN_POINTER:
511:       sf->mine_alloc = (PetscInt*)ilocal;
512:       sf->mine       = sf->mine_alloc;
513:       break;
514:     case PETSC_USE_POINTER:
515:       sf->mine_alloc = NULL;
516:       sf->mine       = (PetscInt*)ilocal;
517:       break;
518:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
519:     }
520:   }

522:   switch (remotemode) {
523:   case PETSC_COPY_VALUES:
524:     PetscMalloc1(nleaves,&sf->remote_alloc);
525:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
526:     sf->remote = sf->remote_alloc;
527:     break;
528:   case PETSC_OWN_POINTER:
529:     sf->remote_alloc = (PetscSFNode*)iremote;
530:     sf->remote       = sf->remote_alloc;
531:     break;
532:   case PETSC_USE_POINTER:
533:     sf->remote_alloc = NULL;
534:     sf->remote       = (PetscSFNode*)iremote;
535:     break;
536:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
537:   }

539:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
540:   sf->graphset = PETSC_TRUE;
541:   return(0);
542: }

544: /*@
545:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

547:   Collective

549:   Input Parameters:
550: + sf      - The PetscSF
551: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
552: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

554:   Notes:
555:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
556:   n and N are local and global sizes of x respectively.

558:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
559:   sequential vectors y on all ranks.

561:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
562:   sequential vector y on rank 0.

564:   In above cases, entries of x are roots and entries of y are leaves.

566:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
567:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
568:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
569:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
570:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

572:   In this case, roots and leaves are symmetric.

574:   Level: intermediate
575:  @*/
576: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
577: {
578:   MPI_Comm       comm;
579:   PetscInt       n,N,res[2];
580:   PetscMPIInt    rank,size;
581:   PetscSFType    type;

585:   PetscObjectGetComm((PetscObject)sf, &comm);
586:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
587:   MPI_Comm_rank(comm,&rank);
588:   MPI_Comm_size(comm,&size);

590:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
591:     type = PETSCSFALLTOALL;
592:     PetscLayoutCreate(comm,&sf->map);
593:     PetscLayoutSetLocalSize(sf->map,size);
594:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
595:     PetscLayoutSetUp(sf->map);
596:   } else {
597:     PetscLayoutGetLocalSize(map,&n);
598:     PetscLayoutGetSize(map,&N);
599:     res[0] = n;
600:     res[1] = -n;
601:     /* Check if n are same over all ranks so that we can optimize it */
602:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
603:     if (res[0] == -res[1]) { /* same n */
604:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
605:     } else {
606:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
607:     }
608:     PetscLayoutReference(map,&sf->map);
609:   }
610:   PetscSFSetType(sf,type);

612:   sf->pattern = pattern;
613:   sf->mine    = NULL; /* Contiguous */

615:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
616:      Also set other easy stuff.
617:    */
618:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
619:     sf->nleaves      = N;
620:     sf->nroots       = n;
621:     sf->nranks       = size;
622:     sf->minleaf      = 0;
623:     sf->maxleaf      = N - 1;
624:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
625:     sf->nleaves      = rank ? 0 : N;
626:     sf->nroots       = n;
627:     sf->nranks       = rank ? 0 : size;
628:     sf->minleaf      = 0;
629:     sf->maxleaf      = rank ? -1 : N - 1;
630:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
631:     sf->nleaves      = size;
632:     sf->nroots       = size;
633:     sf->nranks       = size;
634:     sf->minleaf      = 0;
635:     sf->maxleaf      = size - 1;
636:   }
637:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
638:   sf->graphset = PETSC_TRUE;
639:   return(0);
640: }

642: /*@
643:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

645:    Collective

647:    Input Arguments:

649: .  sf - star forest to invert

651:    Output Arguments:
652: .  isf - inverse of sf
653:    Level: advanced

655:    Notes:
656:    All roots must have degree 1.

658:    The local space may be a permutation, but cannot be sparse.

660: .seealso: PetscSFSetGraph()
661: @*/
662: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
663: {
665:   PetscMPIInt    rank;
666:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
667:   const PetscInt *ilocal;
668:   PetscSFNode    *roots,*leaves;

672:   PetscSFCheckGraphSet(sf,1);

675:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
676:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

678:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
679:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
680:   for (i=0; i<maxlocal; i++) {
681:     leaves[i].rank  = rank;
682:     leaves[i].index = i;
683:   }
684:   for (i=0; i <nroots; i++) {
685:     roots[i].rank  = -1;
686:     roots[i].index = -1;
687:   }
688:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
689:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);

691:   /* Check whether our leaves are sparse */
692:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
693:   if (count == nroots) newilocal = NULL;
694:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
695:     PetscMalloc1(count,&newilocal);
696:     for (i=0,count=0; i<nroots; i++) {
697:       if (roots[i].rank >= 0) {
698:         newilocal[count]   = i;
699:         roots[count].rank  = roots[i].rank;
700:         roots[count].index = roots[i].index;
701:         count++;
702:       }
703:     }
704:   }

706:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
707:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
708:   PetscFree2(roots,leaves);
709:   return(0);
710: }

712: /*@
713:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

715:    Collective

717:    Input Arguments:
718: +  sf - communication object to duplicate
719: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

721:    Output Arguments:
722: .  newsf - new communication object

724:    Level: beginner

726: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
727: @*/
728: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
729: {
730:   PetscSFType    type;
732:   MPI_Datatype   dtype=MPIU_SCALAR;

738:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
739:   PetscSFGetType(sf,&type);
740:   if (type) {PetscSFSetType(*newsf,type);}
741:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
742:     PetscSFCheckGraphSet(sf,1);
743:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
744:       PetscInt          nroots,nleaves;
745:       const PetscInt    *ilocal;
746:       const PetscSFNode *iremote;
747:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
748:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
749:     } else {
750:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
751:     }
752:   }
753:   /* Since oldtype is committed, so is newtype, according to MPI */
754:   if (sf->vscat.bs > 1) {MPI_Type_dup(sf->vscat.unit,&dtype);}
755:   (*newsf)->vscat.bs     = sf->vscat.bs;
756:   (*newsf)->vscat.unit   = dtype;
757:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
758:   (*newsf)->vscat.from_n = sf->vscat.from_n;
759:   /* Do not copy lsf. Build it on demand since it is rarely used */

761: #if defined(PETSC_HAVE_DEVICE)
762:   (*newsf)->backend              = sf->backend;
763:   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
764:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
765:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
766: #endif
767:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
768:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
769:   return(0);
770: }

772: /*@C
773:    PetscSFGetGraph - Get the graph specifying a parallel star forest

775:    Not Collective

777:    Input Arguments:
778: .  sf - star forest

780:    Output Arguments:
781: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
782: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
783: .  ilocal - locations of leaves in leafdata buffers
784: -  iremote - remote locations of root vertices for each leaf on the current process

786:    Notes:
787:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

789:    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
790:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

792:    Level: intermediate

794: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
795: @*/
796: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
797: {

802:   if (sf->ops->GetGraph) {
803:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
804:   } else {
805:     if (nroots) *nroots = sf->nroots;
806:     if (nleaves) *nleaves = sf->nleaves;
807:     if (ilocal) *ilocal = sf->mine;
808:     if (iremote) *iremote = sf->remote;
809:   }
810:   return(0);
811: }

813: /*@
814:    PetscSFGetLeafRange - Get the active leaf ranges

816:    Not Collective

818:    Input Arguments:
819: .  sf - star forest

821:    Output Arguments:
822: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
823: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

825:    Level: developer

827: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
828: @*/
829: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
830: {
833:   PetscSFCheckGraphSet(sf,1);
834:   if (minleaf) *minleaf = sf->minleaf;
835:   if (maxleaf) *maxleaf = sf->maxleaf;
836:   return(0);
837: }

839: /*@C
840:    PetscSFViewFromOptions - View from Options

842:    Collective on PetscSF

844:    Input Parameters:
845: +  A - the star forest
846: .  obj - Optional object
847: -  name - command line option

849:    Level: intermediate
850: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
851: @*/
852: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
853: {

858:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
859:   return(0);
860: }

862: /*@C
863:    PetscSFView - view a star forest

865:    Collective

867:    Input Arguments:
868: +  sf - star forest
869: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

871:    Level: beginner

873: .seealso: PetscSFCreate(), PetscSFSetGraph()
874: @*/
875: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
876: {
877:   PetscErrorCode    ierr;
878:   PetscBool         iascii;
879:   PetscViewerFormat format;

883:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
886:   if (sf->graphset) {PetscSFSetUp(sf);}
887:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
888:   if (iascii) {
889:     PetscMPIInt rank;
890:     PetscInt    ii,i,j;

892:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
893:     PetscViewerASCIIPushTab(viewer);
894:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
895:       if (!sf->graphset) {
896:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
897:         PetscViewerASCIIPopTab(viewer);
898:         return(0);
899:       }
900:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
901:       PetscViewerASCIIPushSynchronized(viewer);
902:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
903:       for (i=0; i<sf->nleaves; i++) {
904:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
905:       }
906:       PetscViewerFlush(viewer);
907:       PetscViewerGetFormat(viewer,&format);
908:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
909:         PetscMPIInt *tmpranks,*perm;
910:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
911:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
912:         for (i=0; i<sf->nranks; i++) perm[i] = i;
913:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
914:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
915:         for (ii=0; ii<sf->nranks; ii++) {
916:           i = perm[ii];
917:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
918:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
919:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
920:           }
921:         }
922:         PetscFree2(tmpranks,perm);
923:       }
924:       PetscViewerFlush(viewer);
925:       PetscViewerASCIIPopSynchronized(viewer);
926:     }
927:     PetscViewerASCIIPopTab(viewer);
928:   }
929:   if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
930:   return(0);
931: }

933: /*@C
934:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

936:    Not Collective

938:    Input Arguments:
939: .  sf - star forest

941:    Output Arguments:
942: +  nranks - number of ranks referenced by local part
943: .  ranks - array of ranks
944: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
945: .  rmine - concatenated array holding local indices referencing each remote rank
946: -  rremote - concatenated array holding remote indices referenced for each remote rank

948:    Level: developer

950: .seealso: PetscSFGetLeafRanks()
951: @*/
952: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
953: {

958:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
959:   if (sf->ops->GetRootRanks) {
960:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
961:   } else {
962:     /* The generic implementation */
963:     if (nranks)  *nranks  = sf->nranks;
964:     if (ranks)   *ranks   = sf->ranks;
965:     if (roffset) *roffset = sf->roffset;
966:     if (rmine)   *rmine   = sf->rmine;
967:     if (rremote) *rremote = sf->rremote;
968:   }
969:   return(0);
970: }

972: /*@C
973:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

975:    Not Collective

977:    Input Arguments:
978: .  sf - star forest

980:    Output Arguments:
981: +  niranks - number of leaf ranks referencing roots on this process
982: .  iranks - array of ranks
983: .  ioffset - offset in irootloc for each rank (length niranks+1)
984: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

986:    Level: developer

988: .seealso: PetscSFGetRootRanks()
989: @*/
990: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
991: {

996:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
997:   if (sf->ops->GetLeafRanks) {
998:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
999:   } else {
1000:     PetscSFType type;
1001:     PetscSFGetType(sf,&type);
1002:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1003:   }
1004:   return(0);
1005: }

1007: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
1008:   PetscInt i;
1009:   for (i=0; i<n; i++) {
1010:     if (needle == list[i]) return PETSC_TRUE;
1011:   }
1012:   return PETSC_FALSE;
1013: }

1015: /*@C
1016:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

1018:    Collective

1020:    Input Arguments:
1021: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1022: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

1024:    Level: developer

1026: .seealso: PetscSFGetRootRanks()
1027: @*/
1028: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1029: {
1030:   PetscErrorCode     ierr;
1031:   PetscTable         table;
1032:   PetscTablePosition pos;
1033:   PetscMPIInt        size,groupsize,*groupranks;
1034:   PetscInt           *rcount,*ranks;
1035:   PetscInt           i, irank = -1,orank = -1;

1039:   PetscSFCheckGraphSet(sf,1);
1040:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1041:   PetscTableCreate(10,size,&table);
1042:   for (i=0; i<sf->nleaves; i++) {
1043:     /* Log 1-based rank */
1044:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1045:   }
1046:   PetscTableGetCount(table,&sf->nranks);
1047:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1048:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1049:   PetscTableGetHeadPosition(table,&pos);
1050:   for (i=0; i<sf->nranks; i++) {
1051:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1052:     ranks[i]--;             /* Convert back to 0-based */
1053:   }
1054:   PetscTableDestroy(&table);

1056:   /* We expect that dgroup is reliably "small" while nranks could be large */
1057:   {
1058:     MPI_Group group = MPI_GROUP_NULL;
1059:     PetscMPIInt *dgroupranks;
1060:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1061:     MPI_Group_size(dgroup,&groupsize);
1062:     PetscMalloc1(groupsize,&dgroupranks);
1063:     PetscMalloc1(groupsize,&groupranks);
1064:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1065:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1066:     MPI_Group_free(&group);
1067:     PetscFree(dgroupranks);
1068:   }

1070:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1071:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1072:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1073:       if (InList(ranks[i],groupsize,groupranks)) break;
1074:     }
1075:     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1076:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1077:     }
1078:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1079:       PetscInt tmprank,tmpcount;

1081:       tmprank             = ranks[i];
1082:       tmpcount            = rcount[i];
1083:       ranks[i]            = ranks[sf->ndranks];
1084:       rcount[i]           = rcount[sf->ndranks];
1085:       ranks[sf->ndranks]  = tmprank;
1086:       rcount[sf->ndranks] = tmpcount;
1087:       sf->ndranks++;
1088:     }
1089:   }
1090:   PetscFree(groupranks);
1091:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1092:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1093:   sf->roffset[0] = 0;
1094:   for (i=0; i<sf->nranks; i++) {
1095:     PetscMPIIntCast(ranks[i],sf->ranks+i);
1096:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1097:     rcount[i]        = 0;
1098:   }
1099:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1100:     /* short circuit */
1101:     if (orank != sf->remote[i].rank) {
1102:       /* Search for index of iremote[i].rank in sf->ranks */
1103:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1104:       if (irank < 0) {
1105:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1106:         if (irank >= 0) irank += sf->ndranks;
1107:       }
1108:       orank = sf->remote[i].rank;
1109:     }
1110:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1111:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1112:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1113:     rcount[irank]++;
1114:   }
1115:   PetscFree2(rcount,ranks);
1116:   return(0);
1117: }

1119: /*@C
1120:    PetscSFGetGroups - gets incoming and outgoing process groups

1122:    Collective

1124:    Input Argument:
1125: .  sf - star forest

1127:    Output Arguments:
1128: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1129: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1131:    Level: developer

1133: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1134: @*/
1135: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1136: {
1138:   MPI_Group      group = MPI_GROUP_NULL;

1141:   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1142:   if (sf->ingroup == MPI_GROUP_NULL) {
1143:     PetscInt       i;
1144:     const PetscInt *indegree;
1145:     PetscMPIInt    rank,*outranks,*inranks;
1146:     PetscSFNode    *remote;
1147:     PetscSF        bgcount;

1149:     /* Compute the number of incoming ranks */
1150:     PetscMalloc1(sf->nranks,&remote);
1151:     for (i=0; i<sf->nranks; i++) {
1152:       remote[i].rank  = sf->ranks[i];
1153:       remote[i].index = 0;
1154:     }
1155:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1156:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1157:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1158:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1159:     /* Enumerate the incoming ranks */
1160:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1161:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1162:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1163:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1164:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1165:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1166:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1167:     MPI_Group_free(&group);
1168:     PetscFree2(inranks,outranks);
1169:     PetscSFDestroy(&bgcount);
1170:   }
1171:   *incoming = sf->ingroup;

1173:   if (sf->outgroup == MPI_GROUP_NULL) {
1174:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1175:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1176:     MPI_Group_free(&group);
1177:   }
1178:   *outgoing = sf->outgroup;
1179:   return(0);
1180: }

1182: /*@
1183:    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters

1185:    Collective

1187:    Input Argument:
1188: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1190:    Output Arguments:
1191: .  multi - star forest with split roots, such that each root has degree exactly 1

1193:    Level: developer

1195:    Notes:

1197:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1198:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1199:    edge, it is a candidate for future optimization that might involve its removal.

1201: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1202: @*/
1203: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1204: {

1210:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1211:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1212:     *multi = sf->multi;
1213:     sf->multi->multi = sf->multi;
1214:     return(0);
1215:   }
1216:   if (!sf->multi) {
1217:     const PetscInt *indegree;
1218:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1219:     PetscSFNode    *remote;
1220:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1221:     PetscSFComputeDegreeBegin(sf,&indegree);
1222:     PetscSFComputeDegreeEnd(sf,&indegree);
1223:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1224:     inoffset[0] = 0;
1225:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1226:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1227:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1228:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1229:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1230:     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1231:       for (i=0; i<sf->nroots; i++) {
1232:         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1233:       }
1234:     }
1235:     PetscMalloc1(sf->nleaves,&remote);
1236:     for (i=0; i<sf->nleaves; i++) {
1237:       remote[i].rank  = sf->remote[i].rank;
1238:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1239:     }
1240:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1241:     sf->multi->multi = sf->multi;
1242:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1243:     if (sf->rankorder) {        /* Sort the ranks */
1244:       PetscMPIInt rank;
1245:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1246:       PetscSFNode *newremote;
1247:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1248:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1249:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1250:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1251:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1252:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1253:       /* Sort the incoming ranks at each vertex, build the inverse map */
1254:       for (i=0; i<sf->nroots; i++) {
1255:         PetscInt j;
1256:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1257:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1258:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1259:       }
1260:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1261:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1262:       PetscMalloc1(sf->nleaves,&newremote);
1263:       for (i=0; i<sf->nleaves; i++) {
1264:         newremote[i].rank  = sf->remote[i].rank;
1265:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1266:       }
1267:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1268:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1269:     }
1270:     PetscFree3(inoffset,outones,outoffset);
1271:   }
1272:   *multi = sf->multi;
1273:   return(0);
1274: }

1276: /*@C
1277:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1279:    Collective

1281:    Input Arguments:
1282: +  sf - original star forest
1283: .  nselected  - number of selected roots on this process
1284: -  selected   - indices of the selected roots on this process

1286:    Output Arguments:
1287: .  esf - new star forest

1289:    Level: advanced

1291:    Note:
1292:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1293:    be done by calling PetscSFGetGraph().

1295: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1296: @*/
1297: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1298: {
1299:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1300:   const PetscInt    *ilocal;
1301:   signed char       *rootdata,*leafdata,*leafmem;
1302:   const PetscSFNode *iremote;
1303:   PetscSFNode       *new_iremote;
1304:   MPI_Comm          comm;
1305:   PetscErrorCode    ierr;

1309:   PetscSFCheckGraphSet(sf,1);

1313:   PetscSFSetUp(sf);
1314:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1315:   PetscObjectGetComm((PetscObject)sf,&comm);
1316:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1318:   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1319:     PetscBool dups;
1321:     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1322:     for (i=0; i<nselected; i++)
1323:       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1324:   }

1326:   if (sf->ops->CreateEmbeddedRootSF) {
1327:     (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1328:   } else {
1329:     /* A generic version of creating embedded sf */
1330:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1331:     maxlocal = maxleaf - minleaf + 1;
1332:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1333:     leafdata = leafmem - minleaf;
1334:     /* Tag selected roots and bcast to leaves */
1335:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1336:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1337:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);

1339:     /* Build esf with leaves that are still connected */
1340:     esf_nleaves = 0;
1341:     for (i=0; i<nleaves; i++) {
1342:       j = ilocal ? ilocal[i] : i;
1343:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1344:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1345:       */
1346:       esf_nleaves += (leafdata[j] ? 1 : 0);
1347:     }
1348:     PetscMalloc1(esf_nleaves,&new_ilocal);
1349:     PetscMalloc1(esf_nleaves,&new_iremote);
1350:     for (i=n=0; i<nleaves; i++) {
1351:       j = ilocal ? ilocal[i] : i;
1352:       if (leafdata[j]) {
1353:         new_ilocal[n]        = j;
1354:         new_iremote[n].rank  = iremote[i].rank;
1355:         new_iremote[n].index = iremote[i].index;
1356:         ++n;
1357:       }
1358:     }
1359:     PetscSFCreate(comm,esf);
1360:     PetscSFSetFromOptions(*esf);
1361:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1362:     PetscFree2(rootdata,leafmem);
1363:   }
1364:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1365:   return(0);
1366: }

1368: /*@C
1369:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1371:   Collective

1373:   Input Arguments:
1374: + sf - original star forest
1375: . nselected  - number of selected leaves on this process
1376: - selected   - indices of the selected leaves on this process

1378:   Output Arguments:
1379: .  newsf - new star forest

1381:   Level: advanced

1383: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1384: @*/
1385: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1386: {
1387:   const PetscSFNode *iremote;
1388:   PetscSFNode       *new_iremote;
1389:   const PetscInt    *ilocal;
1390:   PetscInt          i,nroots,*leaves,*new_ilocal;
1391:   MPI_Comm          comm;
1392:   PetscErrorCode    ierr;

1396:   PetscSFCheckGraphSet(sf,1);

1400:   /* Uniq selected[] and put results in leaves[] */
1401:   PetscObjectGetComm((PetscObject)sf,&comm);
1402:   PetscMalloc1(nselected,&leaves);
1403:   PetscArraycpy(leaves,selected,nselected);
1404:   PetscSortedRemoveDupsInt(&nselected,leaves);
1405:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1407:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1408:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1409:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1410:   } else {
1411:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1412:     PetscMalloc1(nselected,&new_ilocal);
1413:     PetscMalloc1(nselected,&new_iremote);
1414:     for (i=0; i<nselected; ++i) {
1415:       const PetscInt l     = leaves[i];
1416:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1417:       new_iremote[i].rank  = iremote[l].rank;
1418:       new_iremote[i].index = iremote[l].index;
1419:     }
1420:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1421:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1422:   }
1423:   PetscFree(leaves);
1424:   return(0);
1425: }

1427: /*@C
1428:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()

1430:    Collective on PetscSF

1432:    Input Arguments:
1433: +  sf - star forest on which to communicate
1434: .  unit - data type associated with each node
1435: .  rootdata - buffer to broadcast
1436: -  op - operation to use for reduction

1438:    Output Arguments:
1439: .  leafdata - buffer to be reduced with values from each leaf's respective root

1441:    Level: intermediate

1443:    Notes:
1444:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1445:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1446:     use PetscSFBcastWithMemTypeBegin() instead.
1447: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1448: @*/
1449: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1450: {
1452:   PetscMemType   rootmtype,leafmtype;

1456:   PetscSFSetUp(sf);
1457:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1458:   PetscGetMemType(rootdata,&rootmtype);
1459:   PetscGetMemType(leafdata,&leafmtype);
1460:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1461:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1462:   return(0);
1463: }

1465: /*@C
1466:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()

1468:    Collective on PetscSF

1470:    Input Arguments:
1471: +  sf - star forest on which to communicate
1472: .  unit - data type associated with each node
1473: .  rootmtype - memory type of rootdata
1474: .  rootdata - buffer to broadcast
1475: .  leafmtype - memory type of leafdata
1476: -  op - operation to use for reduction

1478:    Output Arguments:
1479: .  leafdata - buffer to be reduced with values from each leaf's respective root

1481:    Level: intermediate

1483: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1484: @*/
1485: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1486: {

1491:   PetscSFSetUp(sf);
1492:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1493:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1494:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1495:   return(0);
1496: }

1498: /*@C
1499:    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()

1501:    Collective

1503:    Input Arguments:
1504: +  sf - star forest
1505: .  unit - data type
1506: .  rootdata - buffer to broadcast
1507: -  op - operation to use for reduction

1509:    Output Arguments:
1510: .  leafdata - buffer to be reduced with values from each leaf's respective root

1512:    Level: intermediate

1514: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1515: @*/
1516: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1517: {

1522:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);}
1523:   (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1524:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);}
1525:   return(0);
1526: }

1528: /*@C
1529:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1531:    Collective

1533:    Input Arguments:
1534: +  sf - star forest
1535: .  unit - data type
1536: .  leafdata - values to reduce
1537: -  op - reduction operation

1539:    Output Arguments:
1540: .  rootdata - result of reduction of values from all leaves of each root

1542:    Level: intermediate

1544:    Notes:
1545:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1546:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1547:     use PetscSFReduceWithMemTypeBegin() instead.

1549: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1550: @*/
1551: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1552: {
1554:   PetscMemType   rootmtype,leafmtype;

1558:   PetscSFSetUp(sf);
1559:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1560:   PetscGetMemType(rootdata,&rootmtype);
1561:   PetscGetMemType(leafdata,&leafmtype);
1562:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1563:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1564:   return(0);
1565: }

1567: /*@C
1568:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()

1570:    Collective

1572:    Input Arguments:
1573: +  sf - star forest
1574: .  unit - data type
1575: .  leafmtype - memory type of leafdata
1576: .  leafdata - values to reduce
1577: .  rootmtype - memory type of rootdata
1578: -  op - reduction operation

1580:    Output Arguments:
1581: .  rootdata - result of reduction of values from all leaves of each root

1583:    Level: intermediate

1585: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1586: @*/
1587: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1588: {

1593:   PetscSFSetUp(sf);
1594:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1595:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1596:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1597:   return(0);
1598: }

1600: /*@C
1601:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1603:    Collective

1605:    Input Arguments:
1606: +  sf - star forest
1607: .  unit - data type
1608: .  leafdata - values to reduce
1609: -  op - reduction operation

1611:    Output Arguments:
1612: .  rootdata - result of reduction of values from all leaves of each root

1614:    Level: intermediate

1616: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1617: @*/
1618: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1619: {

1624:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);}
1625:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1626:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);}
1627:   return(0);
1628: }

1630: /*@C
1631:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1633:    Collective

1635:    Input Arguments:
1636: +  sf - star forest
1637: .  unit - data type
1638: .  leafdata - leaf values to use in reduction
1639: -  op - operation to use for reduction

1641:    Output Arguments:
1642: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1643: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1645:    Level: advanced

1647:    Note:
1648:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1649:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1650:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1651:    integers.

1653: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1654: @*/
1655: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1656: {
1658:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1662:   PetscSFSetUp(sf);
1663:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1664:   PetscGetMemType(rootdata,&rootmtype);
1665:   PetscGetMemType(leafdata,&leafmtype);
1666:   PetscGetMemType(leafupdate,&leafupdatemtype);
1667:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1668:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1669:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1670:   return(0);
1671: }

1673: /*@C
1674:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1676:    Collective

1678:    Input Arguments:
1679: +  sf - star forest
1680: .  unit - data type
1681: .  leafdata - leaf values to use in reduction
1682: -  op - operation to use for reduction

1684:    Output Arguments:
1685: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1686: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1688:    Level: advanced

1690: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1691: @*/
1692: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1693: {

1698:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1699:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1700:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1701:   return(0);
1702: }

1704: /*@C
1705:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1707:    Collective

1709:    Input Arguments:
1710: .  sf - star forest

1712:    Output Arguments:
1713: .  degree - degree of each root vertex

1715:    Level: advanced

1717:    Notes:
1718:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1720: .seealso: PetscSFGatherBegin()
1721: @*/
1722: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1723: {

1728:   PetscSFCheckGraphSet(sf,1);
1730:   if (!sf->degreeknown) {
1731:     PetscInt i, nroots = sf->nroots, maxlocal;
1732:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1733:     maxlocal = sf->maxleaf-sf->minleaf+1;
1734:     PetscMalloc1(nroots,&sf->degree);
1735:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1736:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1737:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1738:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1739:   }
1740:   *degree = NULL;
1741:   return(0);
1742: }

1744: /*@C
1745:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1747:    Collective

1749:    Input Arguments:
1750: .  sf - star forest

1752:    Output Arguments:
1753: .  degree - degree of each root vertex

1755:    Level: developer

1757:    Notes:
1758:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1760: .seealso:
1761: @*/
1762: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1763: {

1768:   PetscSFCheckGraphSet(sf,1);
1770:   if (!sf->degreeknown) {
1771:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1772:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1773:     PetscFree(sf->degreetmp);
1774:     sf->degreeknown = PETSC_TRUE;
1775:   }
1776:   *degree = sf->degree;
1777:   return(0);
1778: }


1781: /*@C
1782:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1783:    Each multi-root is assigned index of the corresponding original root.

1785:    Collective

1787:    Input Arguments:
1788: +  sf - star forest
1789: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1791:    Output Arguments:
1792: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1793: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1795:    Level: developer

1797:    Notes:
1798:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1800: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1801: @*/
1802: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1803: {
1804:   PetscSF             msf;
1805:   PetscInt            i, j, k, nroots, nmroots;
1806:   PetscErrorCode      ierr;

1810:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1814:   PetscSFGetMultiSF(sf,&msf);
1815:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1816:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1817:   for (i=0,j=0,k=0; i<nroots; i++) {
1818:     if (!degree[i]) continue;
1819:     for (j=0; j<degree[i]; j++,k++) {
1820:       (*multiRootsOrigNumbering)[k] = i;
1821:     }
1822:   }
1823:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1824:   if (nMultiRoots) *nMultiRoots = nmroots;
1825:   return(0);
1826: }

1828: /*@C
1829:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1831:    Collective

1833:    Input Arguments:
1834: +  sf - star forest
1835: .  unit - data type
1836: -  leafdata - leaf data to gather to roots

1838:    Output Argument:
1839: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1841:    Level: intermediate

1843: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1844: @*/
1845: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1846: {
1848:   PetscSF        multi = NULL;

1852:   PetscSFSetUp(sf);
1853:   PetscSFGetMultiSF(sf,&multi);
1854:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1855:   return(0);
1856: }

1858: /*@C
1859:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1861:    Collective

1863:    Input Arguments:
1864: +  sf - star forest
1865: .  unit - data type
1866: -  leafdata - leaf data to gather to roots

1868:    Output Argument:
1869: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1871:    Level: intermediate

1873: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1874: @*/
1875: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1876: {
1878:   PetscSF        multi = NULL;

1882:   PetscSFGetMultiSF(sf,&multi);
1883:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1884:   return(0);
1885: }

1887: /*@C
1888:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1890:    Collective

1892:    Input Arguments:
1893: +  sf - star forest
1894: .  unit - data type
1895: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1897:    Output Argument:
1898: .  leafdata - leaf data to be update with personal data from each respective root

1900:    Level: intermediate

1902: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1903: @*/
1904: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1905: {
1907:   PetscSF        multi = NULL;

1911:   PetscSFSetUp(sf);
1912:   PetscSFGetMultiSF(sf,&multi);
1913:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1914:   return(0);
1915: }

1917: /*@C
1918:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1920:    Collective

1922:    Input Arguments:
1923: +  sf - star forest
1924: .  unit - data type
1925: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1927:    Output Argument:
1928: .  leafdata - leaf data to be update with personal data from each respective root

1930:    Level: intermediate

1932: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1933: @*/
1934: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1935: {
1937:   PetscSF        multi = NULL;

1941:   PetscSFGetMultiSF(sf,&multi);
1942:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1943:   return(0);
1944: }

1946: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1947: {
1948:   PetscInt        i, n, nleaves;
1949:   const PetscInt *ilocal = NULL;
1950:   PetscHSetI      seen;
1951:   PetscErrorCode  ierr;

1954:   if (PetscDefined(USE_DEBUG)) {
1955:     PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1956:     PetscHSetICreate(&seen);
1957:     for (i = 0; i < nleaves; i++) {
1958:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1959:       PetscHSetIAdd(seen,leaf);
1960:     }
1961:     PetscHSetIGetSize(seen,&n);
1962:     if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1963:     PetscHSetIDestroy(&seen);
1964:   }
1965:   return(0);
1966: }

1968: /*@
1969:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1971:   Input Parameters:
1972: + sfA - The first PetscSF
1973: - sfB - The second PetscSF

1975:   Output Parameters:
1976: . sfBA - The composite SF

1978:   Level: developer

1980:   Notes:
1981:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1982:   forests, i.e. the same leaf is not connected with different roots.

1984:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1985:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1986:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1987:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1989: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1990: @*/
1991: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1992: {
1993:   PetscErrorCode    ierr;
1994:   const PetscSFNode *remotePointsA,*remotePointsB;
1995:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1996:   const PetscInt    *localPointsA,*localPointsB;
1997:   PetscInt          *localPointsBA;
1998:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1999:   PetscBool         denseB;

2003:   PetscSFCheckGraphSet(sfA,1);
2005:   PetscSFCheckGraphSet(sfB,2);
2008:   PetscSFCheckLeavesUnique_Private(sfA);
2009:   PetscSFCheckLeavesUnique_Private(sfB);

2011:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
2012:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
2013:   if (localPointsA) {
2014:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
2015:     for (i=0; i<numRootsB; i++) {
2016:       reorderedRemotePointsA[i].rank = -1;
2017:       reorderedRemotePointsA[i].index = -1;
2018:     }
2019:     for (i=0; i<numLeavesA; i++) {
2020:       if (localPointsA[i] >= numRootsB) continue;
2021:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2022:     }
2023:     remotePointsA = reorderedRemotePointsA;
2024:   }
2025:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
2026:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
2027:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2028:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2029:   PetscFree(reorderedRemotePointsA);

2031:   denseB = (PetscBool)!localPointsB;
2032:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2033:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2034:     else numLeavesBA++;
2035:   }
2036:   if (denseB) {
2037:     localPointsBA  = NULL;
2038:     remotePointsBA = leafdataB;
2039:   } else {
2040:     PetscMalloc1(numLeavesBA,&localPointsBA);
2041:     PetscMalloc1(numLeavesBA,&remotePointsBA);
2042:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2043:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2045:       if (leafdataB[l-minleaf].rank == -1) continue;
2046:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2047:       localPointsBA[numLeavesBA] = l;
2048:       numLeavesBA++;
2049:     }
2050:     PetscFree(leafdataB);
2051:   }
2052:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2053:   PetscSFSetFromOptions(*sfBA);
2054:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2055:   return(0);
2056: }

2058: /*@
2059:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

2061:   Input Parameters:
2062: + sfA - The first PetscSF
2063: - sfB - The second PetscSF

2065:   Output Parameters:
2066: . sfBA - The composite SF.

2068:   Level: developer

2070:   Notes:
2071:   Currently, the two SFs must be defined on congruent communicators and they must be true star
2072:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2073:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

2075:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2076:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2077:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2078:   a Reduce on sfB, on connected roots.

2080: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2081: @*/
2082: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2083: {
2084:   PetscErrorCode    ierr;
2085:   const PetscSFNode *remotePointsA,*remotePointsB;
2086:   PetscSFNode       *remotePointsBA;
2087:   const PetscInt    *localPointsA,*localPointsB;
2088:   PetscSFNode       *reorderedRemotePointsA = NULL;
2089:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2090:   MPI_Op            op;
2091: #if defined(PETSC_USE_64BIT_INDICES)
2092:   PetscBool         iswin;
2093: #endif

2097:   PetscSFCheckGraphSet(sfA,1);
2099:   PetscSFCheckGraphSet(sfB,2);
2102:   PetscSFCheckLeavesUnique_Private(sfA);
2103:   PetscSFCheckLeavesUnique_Private(sfB);

2105:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2106:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2108:   /* TODO: Check roots of sfB have degree of 1 */
2109:   /* Once we implement it, we can replace the MPI_MAXLOC
2110:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2111:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2112:      the root condition is not meet.
2113:    */
2114:   op = MPI_MAXLOC;
2115: #if defined(PETSC_USE_64BIT_INDICES)
2116:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2117:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2118:   if (iswin) op = MPI_REPLACE;
2119: #endif

2121:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2122:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2123:   for (i=0; i<maxleaf - minleaf + 1; i++) {
2124:     reorderedRemotePointsA[i].rank = -1;
2125:     reorderedRemotePointsA[i].index = -1;
2126:   }
2127:   if (localPointsA) {
2128:     for (i=0; i<numLeavesA; i++) {
2129:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2130:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2131:     }
2132:   } else {
2133:     for (i=0; i<numLeavesA; i++) {
2134:       if (i > maxleaf || i < minleaf) continue;
2135:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2136:     }
2137:   }

2139:   PetscMalloc1(numRootsB,&localPointsBA);
2140:   PetscMalloc1(numRootsB,&remotePointsBA);
2141:   for (i=0; i<numRootsB; i++) {
2142:     remotePointsBA[i].rank = -1;
2143:     remotePointsBA[i].index = -1;
2144:   }

2146:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2147:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2148:   PetscFree(reorderedRemotePointsA);
2149:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2150:     if (remotePointsBA[i].rank == -1) continue;
2151:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2152:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2153:     localPointsBA[numLeavesBA] = i;
2154:     numLeavesBA++;
2155:   }
2156:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2157:   PetscSFSetFromOptions(*sfBA);
2158:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2159:   return(0);
2160: }

2162: /*
2163:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2165:   Input Parameters:
2166: . sf - The global PetscSF

2168:   Output Parameters:
2169: . out - The local PetscSF
2170:  */
2171: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2172: {
2173:   MPI_Comm           comm;
2174:   PetscMPIInt        myrank;
2175:   const PetscInt     *ilocal;
2176:   const PetscSFNode  *iremote;
2177:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2178:   PetscSFNode        *liremote;
2179:   PetscSF            lsf;
2180:   PetscErrorCode     ierr;

2184:   if (sf->ops->CreateLocalSF) {
2185:     (*sf->ops->CreateLocalSF)(sf,out);
2186:   } else {
2187:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2188:     PetscObjectGetComm((PetscObject)sf,&comm);
2189:     MPI_Comm_rank(comm,&myrank);

2191:     /* Find out local edges and build a local SF */
2192:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2193:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2194:     PetscMalloc1(lnleaves,&lilocal);
2195:     PetscMalloc1(lnleaves,&liremote);

2197:     for (i=j=0; i<nleaves; i++) {
2198:       if (iremote[i].rank == (PetscInt)myrank) {
2199:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2200:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2201:         liremote[j].index = iremote[i].index;
2202:         j++;
2203:       }
2204:     }
2205:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2206:     PetscSFSetFromOptions(lsf);
2207:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2208:     PetscSFSetUp(lsf);
2209:     *out = lsf;
2210:   }
2211:   return(0);
2212: }

2214: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2215: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2216: {
2217:   PetscErrorCode     ierr;
2218:   PetscMemType       rootmtype,leafmtype;

2222:   PetscSFSetUp(sf);
2223:   PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2224:   PetscGetMemType(rootdata,&rootmtype);
2225:   PetscGetMemType(leafdata,&leafmtype);
2226:   if (sf->ops->BcastToZero) {
2227:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2228:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2229:   PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2230:   return(0);
2231: }