Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_HAVE_CUDA)
6: #include <cuda_runtime.h>
7: #endif
9: #if defined(PETSC_HAVE_HIP)
10: #include <hip/hip_runtime.h>
11: #endif
13: #if defined(PETSC_USE_DEBUG)
14: # define PetscSFCheckGraphSet(sf,arg) do { \
15: if (PetscUnlikely(!(sf)->graphset)) \
16: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17: } while (0)
18: #else
19: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20: #endif
22: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
24: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25: {
28: *type = PETSC_MEMTYPE_HOST;
29: #if defined(PETSC_HAVE_CUDA)
30: if (PetscCUDAInitialized && data) {
31: cudaError_t cerr;
32: struct cudaPointerAttributes attr;
33: enum cudaMemoryType mtype;
34: cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35: cudaGetLastError(); /* Reset the last error */
36: #if (CUDART_VERSION < 10000)
37: mtype = attr.memoryType;
38: #else
39: mtype = attr.type;
40: #endif
41: if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42: }
43: #endif
45: #if defined(PETSC_HAVE_HIP)
46: if (PetscHIPInitialized && data) {
47: hipError_t cerr;
48: struct hipPointerAttribute_t attr;
49: enum hipMemoryType mtype;
50: cerr = hipPointerGetAttributes(&attr,data);
51: hipGetLastError(); /* Reset the last error */
52: mtype = attr.memoryType;
53: if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54: }
55: #endif
56: return(0);
57: }
59: /*@
60: PetscSFCreate - create a star forest communication context
62: Collective
64: Input Arguments:
65: . comm - communicator on which the star forest will operate
67: Output Arguments:
68: . sf - new star forest context
70: Options Database Keys:
71: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
72: . -sf_type window -Use MPI-3 one-sided window for communication
73: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
75: Level: intermediate
77: Notes:
78: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80: SFs are optimized and they have better performance than general SFs.
82: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83: @*/
84: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85: {
87: PetscSF b;
91: PetscSFInitializePackage();
93: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
95: b->nroots = -1;
96: b->nleaves = -1;
97: b->minleaf = PETSC_MAX_INT;
98: b->maxleaf = PETSC_MIN_INT;
99: b->nranks = -1;
100: b->rankorder = PETSC_TRUE;
101: b->ingroup = MPI_GROUP_NULL;
102: b->outgroup = MPI_GROUP_NULL;
103: b->graphset = PETSC_FALSE;
104: #if defined(PETSC_HAVE_DEVICE)
105: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
106: b->use_stream_aware_mpi = PETSC_FALSE;
107: b->unknown_input_stream= PETSC_FALSE;
108: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109: b->backend = PETSCSF_BACKEND_KOKKOS;
110: #elif defined(PETSC_HAVE_CUDA)
111: b->backend = PETSCSF_BACKEND_CUDA;
112: #elif defined(PETSC_HAVE_HIP)
113: b->backend = PETSCSF_BACKEND_HIP;
114: #endif
116: #if defined(PETSC_HAVE_NVSHMEM)
117: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
118: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
119: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
120: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
121: #endif
122: #endif
123: b->vscat.from_n = -1;
124: b->vscat.to_n = -1;
125: b->vscat.unit = MPIU_SCALAR;
126: *sf = b;
127: return(0);
128: }
130: /*@
131: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
133: Collective
135: Input Arguments:
136: . sf - star forest
138: Level: advanced
140: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
141: @*/
142: PetscErrorCode PetscSFReset(PetscSF sf)
143: {
148: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
149: sf->nroots = -1;
150: sf->nleaves = -1;
151: sf->minleaf = PETSC_MAX_INT;
152: sf->maxleaf = PETSC_MIN_INT;
153: sf->mine = NULL;
154: sf->remote = NULL;
155: sf->graphset = PETSC_FALSE;
156: PetscFree(sf->mine_alloc);
157: PetscFree(sf->remote_alloc);
158: sf->nranks = -1;
159: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
160: sf->degreeknown = PETSC_FALSE;
161: PetscFree(sf->degree);
162: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
163: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
164: if (sf->multi) sf->multi->multi = NULL;
165: PetscSFDestroy(&sf->multi);
166: PetscLayoutDestroy(&sf->map);
168: #if defined(PETSC_HAVE_DEVICE)
169: for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
170: #endif
172: sf->setupcalled = PETSC_FALSE;
173: return(0);
174: }
176: /*@C
177: PetscSFSetType - Set the PetscSF communication implementation
179: Collective on PetscSF
181: Input Parameters:
182: + sf - the PetscSF context
183: - type - a known method
185: Options Database Key:
186: . -sf_type <type> - Sets the method; use -help for a list
187: of available methods (for instance, window, basic, neighbor)
189: Notes:
190: See "include/petscsf.h" for available methods (for instance)
191: + PETSCSFWINDOW - MPI-2/3 one-sided
192: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
194: Level: intermediate
196: .seealso: PetscSFType, PetscSFCreate()
197: @*/
198: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
199: {
200: PetscErrorCode ierr,(*r)(PetscSF);
201: PetscBool match;
207: PetscObjectTypeCompare((PetscObject)sf,type,&match);
208: if (match) return(0);
210: PetscFunctionListFind(PetscSFList,type,&r);
211: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
212: /* Destroy the previous PetscSF implementation context */
213: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
214: PetscMemzero(sf->ops,sizeof(*sf->ops));
215: PetscObjectChangeTypeName((PetscObject)sf,type);
216: (*r)(sf);
217: return(0);
218: }
220: /*@C
221: PetscSFGetType - Get the PetscSF communication implementation
223: Not Collective
225: Input Parameter:
226: . sf - the PetscSF context
228: Output Parameter:
229: . type - the PetscSF type name
231: Level: intermediate
233: .seealso: PetscSFSetType(), PetscSFCreate()
234: @*/
235: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
236: {
240: *type = ((PetscObject)sf)->type_name;
241: return(0);
242: }
244: /*@C
245: PetscSFDestroy - destroy star forest
247: Collective
249: Input Arguments:
250: . sf - address of star forest
252: Level: intermediate
254: .seealso: PetscSFCreate(), PetscSFReset()
255: @*/
256: PetscErrorCode PetscSFDestroy(PetscSF *sf)
257: {
261: if (!*sf) return(0);
263: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
264: PetscSFReset(*sf);
265: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
266: PetscSFDestroy(&(*sf)->vscat.lsf);
267: if ((*sf)->vscat.bs > 1) {MPI_Type_free(&(*sf)->vscat.unit);}
268: PetscHeaderDestroy(sf);
269: return(0);
270: }
272: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
273: {
274: PetscInt i, nleaves;
275: PetscMPIInt size;
276: const PetscInt *ilocal;
277: const PetscSFNode *iremote;
278: PetscErrorCode ierr;
281: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
282: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
283: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
284: for (i = 0; i < nleaves; i++) {
285: const PetscInt rank = iremote[i].rank;
286: const PetscInt remote = iremote[i].index;
287: const PetscInt leaf = ilocal ? ilocal[i] : i;
288: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
289: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
290: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
291: }
292: return(0);
293: }
295: /*@
296: PetscSFSetUp - set up communication structures
298: Collective
300: Input Arguments:
301: . sf - star forest communication object
303: Level: beginner
305: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
306: @*/
307: PetscErrorCode PetscSFSetUp(PetscSF sf)
308: {
313: PetscSFCheckGraphSet(sf,1);
314: if (sf->setupcalled) return(0);
315: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
316: PetscSFCheckGraphValid_Private(sf);
317: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
318: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
319: #if defined(PETSC_HAVE_CUDA)
320: if (sf->backend == PETSCSF_BACKEND_CUDA) {
321: sf->ops->Malloc = PetscSFMalloc_CUDA;
322: sf->ops->Free = PetscSFFree_CUDA;
323: }
324: #endif
325: #if defined(PETSC_HAVE_HIP)
326: if (sf->backend == PETSCSF_BACKEND_HIP) {
327: sf->ops->Malloc = PetscSFMalloc_HIP;
328: sf->ops->Free = PetscSFFree_HIP;
329: }
330: #endif
332: #
333: #if defined(PETSC_HAVE_KOKKOS)
334: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
335: sf->ops->Malloc = PetscSFMalloc_Kokkos;
336: sf->ops->Free = PetscSFFree_Kokkos;
337: }
338: #endif
339: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
340: sf->setupcalled = PETSC_TRUE;
341: return(0);
342: }
344: /*@
345: PetscSFSetFromOptions - set PetscSF options using the options database
347: Logically Collective
349: Input Arguments:
350: . sf - star forest
352: Options Database Keys:
353: + -sf_type - implementation type, see PetscSFSetType()
354: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
355: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
356: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
357: If true, this option only works with -use_gpu_aware_mpi 1.
358: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
359: If true, this option only works with -use_gpu_aware_mpi 1.
361: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
362: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
363: the only available is kokkos.
365: Level: intermediate
366: @*/
367: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
368: {
369: PetscSFType deft;
370: char type[256];
372: PetscBool flg;
376: PetscObjectOptionsBegin((PetscObject)sf);
377: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
378: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
379: PetscSFSetType(sf,flg ? type : deft);
380: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
381: #if defined(PETSC_HAVE_DEVICE)
382: {
383: char backendstr[32] = {0};
384: PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
385: /* Change the defaults set in PetscSFCreate() with command line options */
386: PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
387: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
388: PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
389: PetscStrcasecmp("cuda",backendstr,&isCuda);
390: PetscStrcasecmp("kokkos",backendstr,&isKokkos);
391: PetscStrcasecmp("hip",backendstr,&isHip);
392: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
393: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
394: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
395: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
396: else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
397: #elif defined(PETSC_HAVE_KOKKOS)
398: if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
399: #endif
400: }
401: #endif
402: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
403: PetscOptionsEnd();
404: return(0);
405: }
407: /*@
408: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
410: Logically Collective
412: Input Arguments:
413: + sf - star forest
414: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
416: Level: advanced
418: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
419: @*/
420: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
421: {
425: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
426: sf->rankorder = flg;
427: return(0);
428: }
430: /*@
431: PetscSFSetGraph - Set a parallel star forest
433: Collective
435: Input Arguments:
436: + sf - star forest
437: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
438: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
439: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
440: during setup in debug mode)
441: . localmode - copy mode for ilocal
442: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
443: during setup in debug mode)
444: - remotemode - copy mode for iremote
446: Level: intermediate
448: Notes:
449: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
451: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
452: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
453: needed
455: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
456: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
458: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
459: @*/
460: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
461: {
468: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
469: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
471: if (sf->nroots >= 0) { /* Reset only if graph already set */
472: PetscSFReset(sf);
473: }
475: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
477: sf->nroots = nroots;
478: sf->nleaves = nleaves;
480: if (nleaves && ilocal) {
481: PetscInt i;
482: PetscInt minleaf = PETSC_MAX_INT;
483: PetscInt maxleaf = PETSC_MIN_INT;
484: int contiguous = 1;
485: for (i=0; i<nleaves; i++) {
486: minleaf = PetscMin(minleaf,ilocal[i]);
487: maxleaf = PetscMax(maxleaf,ilocal[i]);
488: contiguous &= (ilocal[i] == i);
489: }
490: sf->minleaf = minleaf;
491: sf->maxleaf = maxleaf;
492: if (contiguous) {
493: if (localmode == PETSC_OWN_POINTER) {
494: PetscFree(ilocal);
495: }
496: ilocal = NULL;
497: }
498: } else {
499: sf->minleaf = 0;
500: sf->maxleaf = nleaves - 1;
501: }
503: if (ilocal) {
504: switch (localmode) {
505: case PETSC_COPY_VALUES:
506: PetscMalloc1(nleaves,&sf->mine_alloc);
507: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
508: sf->mine = sf->mine_alloc;
509: break;
510: case PETSC_OWN_POINTER:
511: sf->mine_alloc = (PetscInt*)ilocal;
512: sf->mine = sf->mine_alloc;
513: break;
514: case PETSC_USE_POINTER:
515: sf->mine_alloc = NULL;
516: sf->mine = (PetscInt*)ilocal;
517: break;
518: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
519: }
520: }
522: switch (remotemode) {
523: case PETSC_COPY_VALUES:
524: PetscMalloc1(nleaves,&sf->remote_alloc);
525: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
526: sf->remote = sf->remote_alloc;
527: break;
528: case PETSC_OWN_POINTER:
529: sf->remote_alloc = (PetscSFNode*)iremote;
530: sf->remote = sf->remote_alloc;
531: break;
532: case PETSC_USE_POINTER:
533: sf->remote_alloc = NULL;
534: sf->remote = (PetscSFNode*)iremote;
535: break;
536: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
537: }
539: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
540: sf->graphset = PETSC_TRUE;
541: return(0);
542: }
544: /*@
545: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
547: Collective
549: Input Parameters:
550: + sf - The PetscSF
551: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
552: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
554: Notes:
555: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
556: n and N are local and global sizes of x respectively.
558: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
559: sequential vectors y on all ranks.
561: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
562: sequential vector y on rank 0.
564: In above cases, entries of x are roots and entries of y are leaves.
566: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
567: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
568: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
569: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
570: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
572: In this case, roots and leaves are symmetric.
574: Level: intermediate
575: @*/
576: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
577: {
578: MPI_Comm comm;
579: PetscInt n,N,res[2];
580: PetscMPIInt rank,size;
581: PetscSFType type;
585: PetscObjectGetComm((PetscObject)sf, &comm);
586: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
587: MPI_Comm_rank(comm,&rank);
588: MPI_Comm_size(comm,&size);
590: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
591: type = PETSCSFALLTOALL;
592: PetscLayoutCreate(comm,&sf->map);
593: PetscLayoutSetLocalSize(sf->map,size);
594: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
595: PetscLayoutSetUp(sf->map);
596: } else {
597: PetscLayoutGetLocalSize(map,&n);
598: PetscLayoutGetSize(map,&N);
599: res[0] = n;
600: res[1] = -n;
601: /* Check if n are same over all ranks so that we can optimize it */
602: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
603: if (res[0] == -res[1]) { /* same n */
604: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
605: } else {
606: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
607: }
608: PetscLayoutReference(map,&sf->map);
609: }
610: PetscSFSetType(sf,type);
612: sf->pattern = pattern;
613: sf->mine = NULL; /* Contiguous */
615: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
616: Also set other easy stuff.
617: */
618: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
619: sf->nleaves = N;
620: sf->nroots = n;
621: sf->nranks = size;
622: sf->minleaf = 0;
623: sf->maxleaf = N - 1;
624: } else if (pattern == PETSCSF_PATTERN_GATHER) {
625: sf->nleaves = rank ? 0 : N;
626: sf->nroots = n;
627: sf->nranks = rank ? 0 : size;
628: sf->minleaf = 0;
629: sf->maxleaf = rank ? -1 : N - 1;
630: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
631: sf->nleaves = size;
632: sf->nroots = size;
633: sf->nranks = size;
634: sf->minleaf = 0;
635: sf->maxleaf = size - 1;
636: }
637: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
638: sf->graphset = PETSC_TRUE;
639: return(0);
640: }
642: /*@
643: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
645: Collective
647: Input Arguments:
649: . sf - star forest to invert
651: Output Arguments:
652: . isf - inverse of sf
653: Level: advanced
655: Notes:
656: All roots must have degree 1.
658: The local space may be a permutation, but cannot be sparse.
660: .seealso: PetscSFSetGraph()
661: @*/
662: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
663: {
665: PetscMPIInt rank;
666: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
667: const PetscInt *ilocal;
668: PetscSFNode *roots,*leaves;
672: PetscSFCheckGraphSet(sf,1);
675: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
676: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
678: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
679: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
680: for (i=0; i<maxlocal; i++) {
681: leaves[i].rank = rank;
682: leaves[i].index = i;
683: }
684: for (i=0; i <nroots; i++) {
685: roots[i].rank = -1;
686: roots[i].index = -1;
687: }
688: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
689: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
691: /* Check whether our leaves are sparse */
692: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
693: if (count == nroots) newilocal = NULL;
694: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
695: PetscMalloc1(count,&newilocal);
696: for (i=0,count=0; i<nroots; i++) {
697: if (roots[i].rank >= 0) {
698: newilocal[count] = i;
699: roots[count].rank = roots[i].rank;
700: roots[count].index = roots[i].index;
701: count++;
702: }
703: }
704: }
706: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
707: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
708: PetscFree2(roots,leaves);
709: return(0);
710: }
712: /*@
713: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
715: Collective
717: Input Arguments:
718: + sf - communication object to duplicate
719: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
721: Output Arguments:
722: . newsf - new communication object
724: Level: beginner
726: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
727: @*/
728: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
729: {
730: PetscSFType type;
732: MPI_Datatype dtype=MPIU_SCALAR;
738: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
739: PetscSFGetType(sf,&type);
740: if (type) {PetscSFSetType(*newsf,type);}
741: if (opt == PETSCSF_DUPLICATE_GRAPH) {
742: PetscSFCheckGraphSet(sf,1);
743: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
744: PetscInt nroots,nleaves;
745: const PetscInt *ilocal;
746: const PetscSFNode *iremote;
747: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
748: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
749: } else {
750: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
751: }
752: }
753: /* Since oldtype is committed, so is newtype, according to MPI */
754: if (sf->vscat.bs > 1) {MPI_Type_dup(sf->vscat.unit,&dtype);}
755: (*newsf)->vscat.bs = sf->vscat.bs;
756: (*newsf)->vscat.unit = dtype;
757: (*newsf)->vscat.to_n = sf->vscat.to_n;
758: (*newsf)->vscat.from_n = sf->vscat.from_n;
759: /* Do not copy lsf. Build it on demand since it is rarely used */
761: #if defined(PETSC_HAVE_DEVICE)
762: (*newsf)->backend = sf->backend;
763: (*newsf)->unknown_input_stream= sf->unknown_input_stream;
764: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
765: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
766: #endif
767: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
768: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
769: return(0);
770: }
772: /*@C
773: PetscSFGetGraph - Get the graph specifying a parallel star forest
775: Not Collective
777: Input Arguments:
778: . sf - star forest
780: Output Arguments:
781: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
782: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
783: . ilocal - locations of leaves in leafdata buffers
784: - iremote - remote locations of root vertices for each leaf on the current process
786: Notes:
787: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
789: When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
790: want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
792: Level: intermediate
794: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
795: @*/
796: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
797: {
802: if (sf->ops->GetGraph) {
803: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
804: } else {
805: if (nroots) *nroots = sf->nroots;
806: if (nleaves) *nleaves = sf->nleaves;
807: if (ilocal) *ilocal = sf->mine;
808: if (iremote) *iremote = sf->remote;
809: }
810: return(0);
811: }
813: /*@
814: PetscSFGetLeafRange - Get the active leaf ranges
816: Not Collective
818: Input Arguments:
819: . sf - star forest
821: Output Arguments:
822: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
823: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
825: Level: developer
827: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
828: @*/
829: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
830: {
833: PetscSFCheckGraphSet(sf,1);
834: if (minleaf) *minleaf = sf->minleaf;
835: if (maxleaf) *maxleaf = sf->maxleaf;
836: return(0);
837: }
839: /*@C
840: PetscSFViewFromOptions - View from Options
842: Collective on PetscSF
844: Input Parameters:
845: + A - the star forest
846: . obj - Optional object
847: - name - command line option
849: Level: intermediate
850: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
851: @*/
852: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
853: {
858: PetscObjectViewFromOptions((PetscObject)A,obj,name);
859: return(0);
860: }
862: /*@C
863: PetscSFView - view a star forest
865: Collective
867: Input Arguments:
868: + sf - star forest
869: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
871: Level: beginner
873: .seealso: PetscSFCreate(), PetscSFSetGraph()
874: @*/
875: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
876: {
877: PetscErrorCode ierr;
878: PetscBool iascii;
879: PetscViewerFormat format;
883: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
886: if (sf->graphset) {PetscSFSetUp(sf);}
887: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
888: if (iascii) {
889: PetscMPIInt rank;
890: PetscInt ii,i,j;
892: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
893: PetscViewerASCIIPushTab(viewer);
894: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
895: if (!sf->graphset) {
896: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
897: PetscViewerASCIIPopTab(viewer);
898: return(0);
899: }
900: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
901: PetscViewerASCIIPushSynchronized(viewer);
902: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
903: for (i=0; i<sf->nleaves; i++) {
904: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
905: }
906: PetscViewerFlush(viewer);
907: PetscViewerGetFormat(viewer,&format);
908: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
909: PetscMPIInt *tmpranks,*perm;
910: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
911: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
912: for (i=0; i<sf->nranks; i++) perm[i] = i;
913: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
914: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
915: for (ii=0; ii<sf->nranks; ii++) {
916: i = perm[ii];
917: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
918: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
919: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
920: }
921: }
922: PetscFree2(tmpranks,perm);
923: }
924: PetscViewerFlush(viewer);
925: PetscViewerASCIIPopSynchronized(viewer);
926: }
927: PetscViewerASCIIPopTab(viewer);
928: }
929: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
930: return(0);
931: }
933: /*@C
934: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
936: Not Collective
938: Input Arguments:
939: . sf - star forest
941: Output Arguments:
942: + nranks - number of ranks referenced by local part
943: . ranks - array of ranks
944: . roffset - offset in rmine/rremote for each rank (length nranks+1)
945: . rmine - concatenated array holding local indices referencing each remote rank
946: - rremote - concatenated array holding remote indices referenced for each remote rank
948: Level: developer
950: .seealso: PetscSFGetLeafRanks()
951: @*/
952: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
953: {
958: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
959: if (sf->ops->GetRootRanks) {
960: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
961: } else {
962: /* The generic implementation */
963: if (nranks) *nranks = sf->nranks;
964: if (ranks) *ranks = sf->ranks;
965: if (roffset) *roffset = sf->roffset;
966: if (rmine) *rmine = sf->rmine;
967: if (rremote) *rremote = sf->rremote;
968: }
969: return(0);
970: }
972: /*@C
973: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
975: Not Collective
977: Input Arguments:
978: . sf - star forest
980: Output Arguments:
981: + niranks - number of leaf ranks referencing roots on this process
982: . iranks - array of ranks
983: . ioffset - offset in irootloc for each rank (length niranks+1)
984: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
986: Level: developer
988: .seealso: PetscSFGetRootRanks()
989: @*/
990: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
991: {
996: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
997: if (sf->ops->GetLeafRanks) {
998: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
999: } else {
1000: PetscSFType type;
1001: PetscSFGetType(sf,&type);
1002: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1003: }
1004: return(0);
1005: }
1007: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
1008: PetscInt i;
1009: for (i=0; i<n; i++) {
1010: if (needle == list[i]) return PETSC_TRUE;
1011: }
1012: return PETSC_FALSE;
1013: }
1015: /*@C
1016: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
1018: Collective
1020: Input Arguments:
1021: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
1022: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1024: Level: developer
1026: .seealso: PetscSFGetRootRanks()
1027: @*/
1028: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1029: {
1030: PetscErrorCode ierr;
1031: PetscTable table;
1032: PetscTablePosition pos;
1033: PetscMPIInt size,groupsize,*groupranks;
1034: PetscInt *rcount,*ranks;
1035: PetscInt i, irank = -1,orank = -1;
1039: PetscSFCheckGraphSet(sf,1);
1040: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1041: PetscTableCreate(10,size,&table);
1042: for (i=0; i<sf->nleaves; i++) {
1043: /* Log 1-based rank */
1044: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1045: }
1046: PetscTableGetCount(table,&sf->nranks);
1047: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1048: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1049: PetscTableGetHeadPosition(table,&pos);
1050: for (i=0; i<sf->nranks; i++) {
1051: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1052: ranks[i]--; /* Convert back to 0-based */
1053: }
1054: PetscTableDestroy(&table);
1056: /* We expect that dgroup is reliably "small" while nranks could be large */
1057: {
1058: MPI_Group group = MPI_GROUP_NULL;
1059: PetscMPIInt *dgroupranks;
1060: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1061: MPI_Group_size(dgroup,&groupsize);
1062: PetscMalloc1(groupsize,&dgroupranks);
1063: PetscMalloc1(groupsize,&groupranks);
1064: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1065: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1066: MPI_Group_free(&group);
1067: PetscFree(dgroupranks);
1068: }
1070: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1071: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1072: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1073: if (InList(ranks[i],groupsize,groupranks)) break;
1074: }
1075: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1076: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1077: }
1078: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1079: PetscInt tmprank,tmpcount;
1081: tmprank = ranks[i];
1082: tmpcount = rcount[i];
1083: ranks[i] = ranks[sf->ndranks];
1084: rcount[i] = rcount[sf->ndranks];
1085: ranks[sf->ndranks] = tmprank;
1086: rcount[sf->ndranks] = tmpcount;
1087: sf->ndranks++;
1088: }
1089: }
1090: PetscFree(groupranks);
1091: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1092: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1093: sf->roffset[0] = 0;
1094: for (i=0; i<sf->nranks; i++) {
1095: PetscMPIIntCast(ranks[i],sf->ranks+i);
1096: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1097: rcount[i] = 0;
1098: }
1099: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1100: /* short circuit */
1101: if (orank != sf->remote[i].rank) {
1102: /* Search for index of iremote[i].rank in sf->ranks */
1103: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1104: if (irank < 0) {
1105: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1106: if (irank >= 0) irank += sf->ndranks;
1107: }
1108: orank = sf->remote[i].rank;
1109: }
1110: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1111: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1112: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1113: rcount[irank]++;
1114: }
1115: PetscFree2(rcount,ranks);
1116: return(0);
1117: }
1119: /*@C
1120: PetscSFGetGroups - gets incoming and outgoing process groups
1122: Collective
1124: Input Argument:
1125: . sf - star forest
1127: Output Arguments:
1128: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1129: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1131: Level: developer
1133: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1134: @*/
1135: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1136: {
1138: MPI_Group group = MPI_GROUP_NULL;
1141: if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1142: if (sf->ingroup == MPI_GROUP_NULL) {
1143: PetscInt i;
1144: const PetscInt *indegree;
1145: PetscMPIInt rank,*outranks,*inranks;
1146: PetscSFNode *remote;
1147: PetscSF bgcount;
1149: /* Compute the number of incoming ranks */
1150: PetscMalloc1(sf->nranks,&remote);
1151: for (i=0; i<sf->nranks; i++) {
1152: remote[i].rank = sf->ranks[i];
1153: remote[i].index = 0;
1154: }
1155: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1156: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1157: PetscSFComputeDegreeBegin(bgcount,&indegree);
1158: PetscSFComputeDegreeEnd(bgcount,&indegree);
1159: /* Enumerate the incoming ranks */
1160: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1161: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1162: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1163: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1164: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1165: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1166: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1167: MPI_Group_free(&group);
1168: PetscFree2(inranks,outranks);
1169: PetscSFDestroy(&bgcount);
1170: }
1171: *incoming = sf->ingroup;
1173: if (sf->outgroup == MPI_GROUP_NULL) {
1174: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1175: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1176: MPI_Group_free(&group);
1177: }
1178: *outgoing = sf->outgroup;
1179: return(0);
1180: }
1182: /*@
1183: PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1185: Collective
1187: Input Argument:
1188: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1190: Output Arguments:
1191: . multi - star forest with split roots, such that each root has degree exactly 1
1193: Level: developer
1195: Notes:
1197: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1198: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1199: edge, it is a candidate for future optimization that might involve its removal.
1201: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1202: @*/
1203: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1204: {
1210: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1211: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1212: *multi = sf->multi;
1213: sf->multi->multi = sf->multi;
1214: return(0);
1215: }
1216: if (!sf->multi) {
1217: const PetscInt *indegree;
1218: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1219: PetscSFNode *remote;
1220: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1221: PetscSFComputeDegreeBegin(sf,&indegree);
1222: PetscSFComputeDegreeEnd(sf,&indegree);
1223: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1224: inoffset[0] = 0;
1225: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1226: for (i=0; i<maxlocal; i++) outones[i] = 1;
1227: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1228: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1229: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1230: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1231: for (i=0; i<sf->nroots; i++) {
1232: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1233: }
1234: }
1235: PetscMalloc1(sf->nleaves,&remote);
1236: for (i=0; i<sf->nleaves; i++) {
1237: remote[i].rank = sf->remote[i].rank;
1238: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1239: }
1240: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1241: sf->multi->multi = sf->multi;
1242: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1243: if (sf->rankorder) { /* Sort the ranks */
1244: PetscMPIInt rank;
1245: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1246: PetscSFNode *newremote;
1247: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1248: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1249: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1250: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1251: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1252: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1253: /* Sort the incoming ranks at each vertex, build the inverse map */
1254: for (i=0; i<sf->nroots; i++) {
1255: PetscInt j;
1256: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1257: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1258: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1259: }
1260: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1261: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1262: PetscMalloc1(sf->nleaves,&newremote);
1263: for (i=0; i<sf->nleaves; i++) {
1264: newremote[i].rank = sf->remote[i].rank;
1265: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1266: }
1267: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1268: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1269: }
1270: PetscFree3(inoffset,outones,outoffset);
1271: }
1272: *multi = sf->multi;
1273: return(0);
1274: }
1276: /*@C
1277: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1279: Collective
1281: Input Arguments:
1282: + sf - original star forest
1283: . nselected - number of selected roots on this process
1284: - selected - indices of the selected roots on this process
1286: Output Arguments:
1287: . esf - new star forest
1289: Level: advanced
1291: Note:
1292: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1293: be done by calling PetscSFGetGraph().
1295: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1296: @*/
1297: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1298: {
1299: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1300: const PetscInt *ilocal;
1301: signed char *rootdata,*leafdata,*leafmem;
1302: const PetscSFNode *iremote;
1303: PetscSFNode *new_iremote;
1304: MPI_Comm comm;
1305: PetscErrorCode ierr;
1309: PetscSFCheckGraphSet(sf,1);
1313: PetscSFSetUp(sf);
1314: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1315: PetscObjectGetComm((PetscObject)sf,&comm);
1316: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1318: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1319: PetscBool dups;
1321: if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1322: for (i=0; i<nselected; i++)
1323: if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1324: }
1326: if (sf->ops->CreateEmbeddedRootSF) {
1327: (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1328: } else {
1329: /* A generic version of creating embedded sf */
1330: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1331: maxlocal = maxleaf - minleaf + 1;
1332: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1333: leafdata = leafmem - minleaf;
1334: /* Tag selected roots and bcast to leaves */
1335: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1336: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1337: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1339: /* Build esf with leaves that are still connected */
1340: esf_nleaves = 0;
1341: for (i=0; i<nleaves; i++) {
1342: j = ilocal ? ilocal[i] : i;
1343: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1344: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1345: */
1346: esf_nleaves += (leafdata[j] ? 1 : 0);
1347: }
1348: PetscMalloc1(esf_nleaves,&new_ilocal);
1349: PetscMalloc1(esf_nleaves,&new_iremote);
1350: for (i=n=0; i<nleaves; i++) {
1351: j = ilocal ? ilocal[i] : i;
1352: if (leafdata[j]) {
1353: new_ilocal[n] = j;
1354: new_iremote[n].rank = iremote[i].rank;
1355: new_iremote[n].index = iremote[i].index;
1356: ++n;
1357: }
1358: }
1359: PetscSFCreate(comm,esf);
1360: PetscSFSetFromOptions(*esf);
1361: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1362: PetscFree2(rootdata,leafmem);
1363: }
1364: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1365: return(0);
1366: }
1368: /*@C
1369: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1371: Collective
1373: Input Arguments:
1374: + sf - original star forest
1375: . nselected - number of selected leaves on this process
1376: - selected - indices of the selected leaves on this process
1378: Output Arguments:
1379: . newsf - new star forest
1381: Level: advanced
1383: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1384: @*/
1385: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1386: {
1387: const PetscSFNode *iremote;
1388: PetscSFNode *new_iremote;
1389: const PetscInt *ilocal;
1390: PetscInt i,nroots,*leaves,*new_ilocal;
1391: MPI_Comm comm;
1392: PetscErrorCode ierr;
1396: PetscSFCheckGraphSet(sf,1);
1400: /* Uniq selected[] and put results in leaves[] */
1401: PetscObjectGetComm((PetscObject)sf,&comm);
1402: PetscMalloc1(nselected,&leaves);
1403: PetscArraycpy(leaves,selected,nselected);
1404: PetscSortedRemoveDupsInt(&nselected,leaves);
1405: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1407: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1408: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1409: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1410: } else {
1411: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1412: PetscMalloc1(nselected,&new_ilocal);
1413: PetscMalloc1(nselected,&new_iremote);
1414: for (i=0; i<nselected; ++i) {
1415: const PetscInt l = leaves[i];
1416: new_ilocal[i] = ilocal ? ilocal[l] : l;
1417: new_iremote[i].rank = iremote[l].rank;
1418: new_iremote[i].index = iremote[l].index;
1419: }
1420: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1421: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1422: }
1423: PetscFree(leaves);
1424: return(0);
1425: }
1427: /*@C
1428: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1430: Collective on PetscSF
1432: Input Arguments:
1433: + sf - star forest on which to communicate
1434: . unit - data type associated with each node
1435: . rootdata - buffer to broadcast
1436: - op - operation to use for reduction
1438: Output Arguments:
1439: . leafdata - buffer to be reduced with values from each leaf's respective root
1441: Level: intermediate
1443: Notes:
1444: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1445: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1446: use PetscSFBcastWithMemTypeBegin() instead.
1447: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1448: @*/
1449: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1450: {
1452: PetscMemType rootmtype,leafmtype;
1456: PetscSFSetUp(sf);
1457: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1458: PetscGetMemType(rootdata,&rootmtype);
1459: PetscGetMemType(leafdata,&leafmtype);
1460: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1461: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1462: return(0);
1463: }
1465: /*@C
1466: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1468: Collective on PetscSF
1470: Input Arguments:
1471: + sf - star forest on which to communicate
1472: . unit - data type associated with each node
1473: . rootmtype - memory type of rootdata
1474: . rootdata - buffer to broadcast
1475: . leafmtype - memory type of leafdata
1476: - op - operation to use for reduction
1478: Output Arguments:
1479: . leafdata - buffer to be reduced with values from each leaf's respective root
1481: Level: intermediate
1483: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1484: @*/
1485: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1486: {
1491: PetscSFSetUp(sf);
1492: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1493: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1494: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1495: return(0);
1496: }
1498: /*@C
1499: PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1501: Collective
1503: Input Arguments:
1504: + sf - star forest
1505: . unit - data type
1506: . rootdata - buffer to broadcast
1507: - op - operation to use for reduction
1509: Output Arguments:
1510: . leafdata - buffer to be reduced with values from each leaf's respective root
1512: Level: intermediate
1514: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1515: @*/
1516: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1517: {
1522: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);}
1523: (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1524: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);}
1525: return(0);
1526: }
1528: /*@C
1529: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1531: Collective
1533: Input Arguments:
1534: + sf - star forest
1535: . unit - data type
1536: . leafdata - values to reduce
1537: - op - reduction operation
1539: Output Arguments:
1540: . rootdata - result of reduction of values from all leaves of each root
1542: Level: intermediate
1544: Notes:
1545: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1546: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1547: use PetscSFReduceWithMemTypeBegin() instead.
1549: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1550: @*/
1551: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1552: {
1554: PetscMemType rootmtype,leafmtype;
1558: PetscSFSetUp(sf);
1559: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1560: PetscGetMemType(rootdata,&rootmtype);
1561: PetscGetMemType(leafdata,&leafmtype);
1562: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1563: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1564: return(0);
1565: }
1567: /*@C
1568: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1570: Collective
1572: Input Arguments:
1573: + sf - star forest
1574: . unit - data type
1575: . leafmtype - memory type of leafdata
1576: . leafdata - values to reduce
1577: . rootmtype - memory type of rootdata
1578: - op - reduction operation
1580: Output Arguments:
1581: . rootdata - result of reduction of values from all leaves of each root
1583: Level: intermediate
1585: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1586: @*/
1587: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1588: {
1593: PetscSFSetUp(sf);
1594: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1595: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1596: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1597: return(0);
1598: }
1600: /*@C
1601: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1603: Collective
1605: Input Arguments:
1606: + sf - star forest
1607: . unit - data type
1608: . leafdata - values to reduce
1609: - op - reduction operation
1611: Output Arguments:
1612: . rootdata - result of reduction of values from all leaves of each root
1614: Level: intermediate
1616: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1617: @*/
1618: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1619: {
1624: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);}
1625: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1626: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);}
1627: return(0);
1628: }
1630: /*@C
1631: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1633: Collective
1635: Input Arguments:
1636: + sf - star forest
1637: . unit - data type
1638: . leafdata - leaf values to use in reduction
1639: - op - operation to use for reduction
1641: Output Arguments:
1642: + rootdata - root values to be updated, input state is seen by first process to perform an update
1643: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1645: Level: advanced
1647: Note:
1648: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1649: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1650: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1651: integers.
1653: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1654: @*/
1655: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1656: {
1658: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1662: PetscSFSetUp(sf);
1663: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1664: PetscGetMemType(rootdata,&rootmtype);
1665: PetscGetMemType(leafdata,&leafmtype);
1666: PetscGetMemType(leafupdate,&leafupdatemtype);
1667: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1668: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1669: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1670: return(0);
1671: }
1673: /*@C
1674: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1676: Collective
1678: Input Arguments:
1679: + sf - star forest
1680: . unit - data type
1681: . leafdata - leaf values to use in reduction
1682: - op - operation to use for reduction
1684: Output Arguments:
1685: + rootdata - root values to be updated, input state is seen by first process to perform an update
1686: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1688: Level: advanced
1690: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1691: @*/
1692: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1693: {
1698: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1699: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1700: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1701: return(0);
1702: }
1704: /*@C
1705: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1707: Collective
1709: Input Arguments:
1710: . sf - star forest
1712: Output Arguments:
1713: . degree - degree of each root vertex
1715: Level: advanced
1717: Notes:
1718: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1720: .seealso: PetscSFGatherBegin()
1721: @*/
1722: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1723: {
1728: PetscSFCheckGraphSet(sf,1);
1730: if (!sf->degreeknown) {
1731: PetscInt i, nroots = sf->nroots, maxlocal;
1732: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1733: maxlocal = sf->maxleaf-sf->minleaf+1;
1734: PetscMalloc1(nroots,&sf->degree);
1735: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1736: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1737: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1738: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1739: }
1740: *degree = NULL;
1741: return(0);
1742: }
1744: /*@C
1745: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1747: Collective
1749: Input Arguments:
1750: . sf - star forest
1752: Output Arguments:
1753: . degree - degree of each root vertex
1755: Level: developer
1757: Notes:
1758: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1760: .seealso:
1761: @*/
1762: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1763: {
1768: PetscSFCheckGraphSet(sf,1);
1770: if (!sf->degreeknown) {
1771: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1772: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1773: PetscFree(sf->degreetmp);
1774: sf->degreeknown = PETSC_TRUE;
1775: }
1776: *degree = sf->degree;
1777: return(0);
1778: }
1781: /*@C
1782: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1783: Each multi-root is assigned index of the corresponding original root.
1785: Collective
1787: Input Arguments:
1788: + sf - star forest
1789: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1791: Output Arguments:
1792: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1793: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1795: Level: developer
1797: Notes:
1798: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1800: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1801: @*/
1802: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1803: {
1804: PetscSF msf;
1805: PetscInt i, j, k, nroots, nmroots;
1806: PetscErrorCode ierr;
1810: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1814: PetscSFGetMultiSF(sf,&msf);
1815: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1816: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1817: for (i=0,j=0,k=0; i<nroots; i++) {
1818: if (!degree[i]) continue;
1819: for (j=0; j<degree[i]; j++,k++) {
1820: (*multiRootsOrigNumbering)[k] = i;
1821: }
1822: }
1823: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1824: if (nMultiRoots) *nMultiRoots = nmroots;
1825: return(0);
1826: }
1828: /*@C
1829: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1831: Collective
1833: Input Arguments:
1834: + sf - star forest
1835: . unit - data type
1836: - leafdata - leaf data to gather to roots
1838: Output Argument:
1839: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1841: Level: intermediate
1843: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1844: @*/
1845: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1846: {
1848: PetscSF multi = NULL;
1852: PetscSFSetUp(sf);
1853: PetscSFGetMultiSF(sf,&multi);
1854: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1855: return(0);
1856: }
1858: /*@C
1859: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1861: Collective
1863: Input Arguments:
1864: + sf - star forest
1865: . unit - data type
1866: - leafdata - leaf data to gather to roots
1868: Output Argument:
1869: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1871: Level: intermediate
1873: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1874: @*/
1875: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1876: {
1878: PetscSF multi = NULL;
1882: PetscSFGetMultiSF(sf,&multi);
1883: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1884: return(0);
1885: }
1887: /*@C
1888: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1890: Collective
1892: Input Arguments:
1893: + sf - star forest
1894: . unit - data type
1895: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1897: Output Argument:
1898: . leafdata - leaf data to be update with personal data from each respective root
1900: Level: intermediate
1902: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1903: @*/
1904: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1905: {
1907: PetscSF multi = NULL;
1911: PetscSFSetUp(sf);
1912: PetscSFGetMultiSF(sf,&multi);
1913: PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1914: return(0);
1915: }
1917: /*@C
1918: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1920: Collective
1922: Input Arguments:
1923: + sf - star forest
1924: . unit - data type
1925: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1927: Output Argument:
1928: . leafdata - leaf data to be update with personal data from each respective root
1930: Level: intermediate
1932: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1933: @*/
1934: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1935: {
1937: PetscSF multi = NULL;
1941: PetscSFGetMultiSF(sf,&multi);
1942: PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1943: return(0);
1944: }
1946: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1947: {
1948: PetscInt i, n, nleaves;
1949: const PetscInt *ilocal = NULL;
1950: PetscHSetI seen;
1951: PetscErrorCode ierr;
1954: if (PetscDefined(USE_DEBUG)) {
1955: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1956: PetscHSetICreate(&seen);
1957: for (i = 0; i < nleaves; i++) {
1958: const PetscInt leaf = ilocal ? ilocal[i] : i;
1959: PetscHSetIAdd(seen,leaf);
1960: }
1961: PetscHSetIGetSize(seen,&n);
1962: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1963: PetscHSetIDestroy(&seen);
1964: }
1965: return(0);
1966: }
1968: /*@
1969: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1971: Input Parameters:
1972: + sfA - The first PetscSF
1973: - sfB - The second PetscSF
1975: Output Parameters:
1976: . sfBA - The composite SF
1978: Level: developer
1980: Notes:
1981: Currently, the two SFs must be defined on congruent communicators and they must be true star
1982: forests, i.e. the same leaf is not connected with different roots.
1984: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1985: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1986: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1987: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1989: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1990: @*/
1991: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1992: {
1993: PetscErrorCode ierr;
1994: const PetscSFNode *remotePointsA,*remotePointsB;
1995: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1996: const PetscInt *localPointsA,*localPointsB;
1997: PetscInt *localPointsBA;
1998: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1999: PetscBool denseB;
2003: PetscSFCheckGraphSet(sfA,1);
2005: PetscSFCheckGraphSet(sfB,2);
2008: PetscSFCheckLeavesUnique_Private(sfA);
2009: PetscSFCheckLeavesUnique_Private(sfB);
2011: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
2012: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
2013: if (localPointsA) {
2014: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
2015: for (i=0; i<numRootsB; i++) {
2016: reorderedRemotePointsA[i].rank = -1;
2017: reorderedRemotePointsA[i].index = -1;
2018: }
2019: for (i=0; i<numLeavesA; i++) {
2020: if (localPointsA[i] >= numRootsB) continue;
2021: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2022: }
2023: remotePointsA = reorderedRemotePointsA;
2024: }
2025: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
2026: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
2027: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2028: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2029: PetscFree(reorderedRemotePointsA);
2031: denseB = (PetscBool)!localPointsB;
2032: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2033: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2034: else numLeavesBA++;
2035: }
2036: if (denseB) {
2037: localPointsBA = NULL;
2038: remotePointsBA = leafdataB;
2039: } else {
2040: PetscMalloc1(numLeavesBA,&localPointsBA);
2041: PetscMalloc1(numLeavesBA,&remotePointsBA);
2042: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2043: const PetscInt l = localPointsB ? localPointsB[i] : i;
2045: if (leafdataB[l-minleaf].rank == -1) continue;
2046: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2047: localPointsBA[numLeavesBA] = l;
2048: numLeavesBA++;
2049: }
2050: PetscFree(leafdataB);
2051: }
2052: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2053: PetscSFSetFromOptions(*sfBA);
2054: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2055: return(0);
2056: }
2058: /*@
2059: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2061: Input Parameters:
2062: + sfA - The first PetscSF
2063: - sfB - The second PetscSF
2065: Output Parameters:
2066: . sfBA - The composite SF.
2068: Level: developer
2070: Notes:
2071: Currently, the two SFs must be defined on congruent communicators and they must be true star
2072: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2073: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2075: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2076: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2077: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2078: a Reduce on sfB, on connected roots.
2080: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2081: @*/
2082: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2083: {
2084: PetscErrorCode ierr;
2085: const PetscSFNode *remotePointsA,*remotePointsB;
2086: PetscSFNode *remotePointsBA;
2087: const PetscInt *localPointsA,*localPointsB;
2088: PetscSFNode *reorderedRemotePointsA = NULL;
2089: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2090: MPI_Op op;
2091: #if defined(PETSC_USE_64BIT_INDICES)
2092: PetscBool iswin;
2093: #endif
2097: PetscSFCheckGraphSet(sfA,1);
2099: PetscSFCheckGraphSet(sfB,2);
2102: PetscSFCheckLeavesUnique_Private(sfA);
2103: PetscSFCheckLeavesUnique_Private(sfB);
2105: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2106: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2108: /* TODO: Check roots of sfB have degree of 1 */
2109: /* Once we implement it, we can replace the MPI_MAXLOC
2110: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2111: We use MPI_MAXLOC only to have a deterministic output from this routine if
2112: the root condition is not meet.
2113: */
2114: op = MPI_MAXLOC;
2115: #if defined(PETSC_USE_64BIT_INDICES)
2116: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2117: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2118: if (iswin) op = MPI_REPLACE;
2119: #endif
2121: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2122: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2123: for (i=0; i<maxleaf - minleaf + 1; i++) {
2124: reorderedRemotePointsA[i].rank = -1;
2125: reorderedRemotePointsA[i].index = -1;
2126: }
2127: if (localPointsA) {
2128: for (i=0; i<numLeavesA; i++) {
2129: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2130: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2131: }
2132: } else {
2133: for (i=0; i<numLeavesA; i++) {
2134: if (i > maxleaf || i < minleaf) continue;
2135: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2136: }
2137: }
2139: PetscMalloc1(numRootsB,&localPointsBA);
2140: PetscMalloc1(numRootsB,&remotePointsBA);
2141: for (i=0; i<numRootsB; i++) {
2142: remotePointsBA[i].rank = -1;
2143: remotePointsBA[i].index = -1;
2144: }
2146: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2147: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2148: PetscFree(reorderedRemotePointsA);
2149: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2150: if (remotePointsBA[i].rank == -1) continue;
2151: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2152: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2153: localPointsBA[numLeavesBA] = i;
2154: numLeavesBA++;
2155: }
2156: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2157: PetscSFSetFromOptions(*sfBA);
2158: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2159: return(0);
2160: }
2162: /*
2163: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2165: Input Parameters:
2166: . sf - The global PetscSF
2168: Output Parameters:
2169: . out - The local PetscSF
2170: */
2171: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2172: {
2173: MPI_Comm comm;
2174: PetscMPIInt myrank;
2175: const PetscInt *ilocal;
2176: const PetscSFNode *iremote;
2177: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2178: PetscSFNode *liremote;
2179: PetscSF lsf;
2180: PetscErrorCode ierr;
2184: if (sf->ops->CreateLocalSF) {
2185: (*sf->ops->CreateLocalSF)(sf,out);
2186: } else {
2187: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2188: PetscObjectGetComm((PetscObject)sf,&comm);
2189: MPI_Comm_rank(comm,&myrank);
2191: /* Find out local edges and build a local SF */
2192: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2193: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2194: PetscMalloc1(lnleaves,&lilocal);
2195: PetscMalloc1(lnleaves,&liremote);
2197: for (i=j=0; i<nleaves; i++) {
2198: if (iremote[i].rank == (PetscInt)myrank) {
2199: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2200: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2201: liremote[j].index = iremote[i].index;
2202: j++;
2203: }
2204: }
2205: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2206: PetscSFSetFromOptions(lsf);
2207: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2208: PetscSFSetUp(lsf);
2209: *out = lsf;
2210: }
2211: return(0);
2212: }
2214: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2215: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2216: {
2217: PetscErrorCode ierr;
2218: PetscMemType rootmtype,leafmtype;
2222: PetscSFSetUp(sf);
2223: PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2224: PetscGetMemType(rootdata,&rootmtype);
2225: PetscGetMemType(leafdata,&leafmtype);
2226: if (sf->ops->BcastToZero) {
2227: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2228: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2229: PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2230: return(0);
2231: }