Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_HAVE_CUDA)
6: #include <cuda_runtime.h>
7: #endif
9: #if defined(PETSC_HAVE_HIP)
10: #include <hip/hip_runtime.h>
11: #endif
13: #if defined(PETSC_USE_DEBUG)
14: # define PetscSFCheckGraphSet(sf,arg) do { \
15: if (PetscUnlikely(!(sf)->graphset)) \
16: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17: } while (0)
18: #else
19: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20: #endif
22: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
24: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25: {
28: *type = PETSC_MEMTYPE_HOST;
29: #if defined(PETSC_HAVE_CUDA)
30: if (PetscCUDAInitialized && data) {
31: cudaError_t cerr;
32: struct cudaPointerAttributes attr;
33: enum cudaMemoryType mtype;
34: cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35: cudaGetLastError(); /* Reset the last error */
36: #if (CUDART_VERSION < 10000)
37: mtype = attr.memoryType;
38: #else
39: mtype = attr.type;
40: #endif
41: if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42: }
43: #endif
45: #if defined(PETSC_HAVE_HIP)
46: if (PetscHIPInitialized && data) {
47: hipError_t cerr;
48: struct hipPointerAttribute_t attr;
49: enum hipMemoryType mtype;
50: cerr = hipPointerGetAttributes(&attr,data);
51: hipGetLastError(); /* Reset the last error */
52: mtype = attr.memoryType;
53: if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54: }
55: #endif
56: return(0);
57: }
59: /*@
60: PetscSFCreate - create a star forest communication context
62: Collective
64: Input Parameter:
65: . comm - communicator on which the star forest will operate
67: Output Parameter:
68: . sf - new star forest context
70: Options Database Keys:
71: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
72: . -sf_type window -Use MPI-3 one-sided window for communication
73: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
75: Level: intermediate
77: Notes:
78: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80: SFs are optimized and they have better performance than general SFs.
82: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83: @*/
84: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85: {
87: PetscSF b;
91: PetscSFInitializePackage();
93: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
95: b->nroots = -1;
96: b->nleaves = -1;
97: b->minleaf = PETSC_MAX_INT;
98: b->maxleaf = PETSC_MIN_INT;
99: b->nranks = -1;
100: b->rankorder = PETSC_TRUE;
101: b->ingroup = MPI_GROUP_NULL;
102: b->outgroup = MPI_GROUP_NULL;
103: b->graphset = PETSC_FALSE;
104: #if defined(PETSC_HAVE_DEVICE)
105: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
106: b->use_stream_aware_mpi = PETSC_FALSE;
107: b->unknown_input_stream= PETSC_FALSE;
108: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109: b->backend = PETSCSF_BACKEND_KOKKOS;
110: #elif defined(PETSC_HAVE_CUDA)
111: b->backend = PETSCSF_BACKEND_CUDA;
112: #elif defined(PETSC_HAVE_HIP)
113: b->backend = PETSCSF_BACKEND_HIP;
114: #endif
116: #if defined(PETSC_HAVE_NVSHMEM)
117: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
118: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
119: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
120: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
121: #endif
122: #endif
123: b->vscat.from_n = -1;
124: b->vscat.to_n = -1;
125: b->vscat.unit = MPIU_SCALAR;
126: *sf = b;
127: return(0);
128: }
130: /*@
131: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
133: Collective
135: Input Parameter:
136: . sf - star forest
138: Level: advanced
140: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
141: @*/
142: PetscErrorCode PetscSFReset(PetscSF sf)
143: {
148: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
149: sf->nroots = -1;
150: sf->nleaves = -1;
151: sf->minleaf = PETSC_MAX_INT;
152: sf->maxleaf = PETSC_MIN_INT;
153: sf->mine = NULL;
154: sf->remote = NULL;
155: sf->graphset = PETSC_FALSE;
156: PetscFree(sf->mine_alloc);
157: PetscFree(sf->remote_alloc);
158: sf->nranks = -1;
159: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
160: sf->degreeknown = PETSC_FALSE;
161: PetscFree(sf->degree);
162: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
163: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
164: if (sf->multi) sf->multi->multi = NULL;
165: PetscSFDestroy(&sf->multi);
166: PetscLayoutDestroy(&sf->map);
168: #if defined(PETSC_HAVE_DEVICE)
169: for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
170: #endif
172: sf->setupcalled = PETSC_FALSE;
173: return(0);
174: }
176: /*@C
177: PetscSFSetType - Set the PetscSF communication implementation
179: Collective on PetscSF
181: Input Parameters:
182: + sf - the PetscSF context
183: - type - a known method
185: Options Database Key:
186: . -sf_type <type> - Sets the method; use -help for a list
187: of available methods (for instance, window, basic, neighbor)
189: Notes:
190: See "include/petscsf.h" for available methods (for instance)
191: + PETSCSFWINDOW - MPI-2/3 one-sided
192: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
194: Level: intermediate
196: .seealso: PetscSFType, PetscSFCreate()
197: @*/
198: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
199: {
200: PetscErrorCode ierr,(*r)(PetscSF);
201: PetscBool match;
207: PetscObjectTypeCompare((PetscObject)sf,type,&match);
208: if (match) return(0);
210: PetscFunctionListFind(PetscSFList,type,&r);
211: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
212: /* Destroy the previous PetscSF implementation context */
213: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
214: PetscMemzero(sf->ops,sizeof(*sf->ops));
215: PetscObjectChangeTypeName((PetscObject)sf,type);
216: (*r)(sf);
217: return(0);
218: }
220: /*@C
221: PetscSFGetType - Get the PetscSF communication implementation
223: Not Collective
225: Input Parameter:
226: . sf - the PetscSF context
228: Output Parameter:
229: . type - the PetscSF type name
231: Level: intermediate
233: .seealso: PetscSFSetType(), PetscSFCreate()
234: @*/
235: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
236: {
240: *type = ((PetscObject)sf)->type_name;
241: return(0);
242: }
244: /*@C
245: PetscSFDestroy - destroy star forest
247: Collective
249: Input Parameter:
250: . sf - address of star forest
252: Level: intermediate
254: .seealso: PetscSFCreate(), PetscSFReset()
255: @*/
256: PetscErrorCode PetscSFDestroy(PetscSF *sf)
257: {
261: if (!*sf) return(0);
263: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
264: PetscSFReset(*sf);
265: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
266: PetscSFDestroy(&(*sf)->vscat.lsf);
267: if ((*sf)->vscat.bs > 1) {MPI_Type_free(&(*sf)->vscat.unit);}
268: PetscHeaderDestroy(sf);
269: return(0);
270: }
272: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
273: {
274: PetscInt i, nleaves;
275: PetscMPIInt size;
276: const PetscInt *ilocal;
277: const PetscSFNode *iremote;
278: PetscErrorCode ierr;
281: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
282: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
283: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
284: for (i = 0; i < nleaves; i++) {
285: const PetscInt rank = iremote[i].rank;
286: const PetscInt remote = iremote[i].index;
287: const PetscInt leaf = ilocal ? ilocal[i] : i;
288: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
289: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
290: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
291: }
292: return(0);
293: }
295: /*@
296: PetscSFSetUp - set up communication structures
298: Collective
300: Input Parameter:
301: . sf - star forest communication object
303: Level: beginner
305: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
306: @*/
307: PetscErrorCode PetscSFSetUp(PetscSF sf)
308: {
313: PetscSFCheckGraphSet(sf,1);
314: if (sf->setupcalled) return(0);
315: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
316: PetscSFCheckGraphValid_Private(sf);
317: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
318: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
319: #if defined(PETSC_HAVE_CUDA)
320: if (sf->backend == PETSCSF_BACKEND_CUDA) {
321: sf->ops->Malloc = PetscSFMalloc_CUDA;
322: sf->ops->Free = PetscSFFree_CUDA;
323: }
324: #endif
325: #if defined(PETSC_HAVE_HIP)
326: if (sf->backend == PETSCSF_BACKEND_HIP) {
327: sf->ops->Malloc = PetscSFMalloc_HIP;
328: sf->ops->Free = PetscSFFree_HIP;
329: }
330: #endif
332: #
333: #if defined(PETSC_HAVE_KOKKOS)
334: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
335: sf->ops->Malloc = PetscSFMalloc_Kokkos;
336: sf->ops->Free = PetscSFFree_Kokkos;
337: }
338: #endif
339: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
340: sf->setupcalled = PETSC_TRUE;
341: return(0);
342: }
344: /*@
345: PetscSFSetFromOptions - set PetscSF options using the options database
347: Logically Collective
349: Input Parameter:
350: . sf - star forest
352: Options Database Keys:
353: + -sf_type - implementation type, see PetscSFSetType()
354: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
355: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
356: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
357: If true, this option only works with -use_gpu_aware_mpi 1.
358: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
359: If true, this option only works with -use_gpu_aware_mpi 1.
361: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
362: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
363: the only available is kokkos.
365: Level: intermediate
366: @*/
367: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
368: {
369: PetscSFType deft;
370: char type[256];
372: PetscBool flg;
376: PetscObjectOptionsBegin((PetscObject)sf);
377: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
378: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
379: PetscSFSetType(sf,flg ? type : deft);
380: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
381: #if defined(PETSC_HAVE_DEVICE)
382: {
383: char backendstr[32] = {0};
384: PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
385: /* Change the defaults set in PetscSFCreate() with command line options */
386: PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
387: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
388: PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
389: PetscStrcasecmp("cuda",backendstr,&isCuda);
390: PetscStrcasecmp("kokkos",backendstr,&isKokkos);
391: PetscStrcasecmp("hip",backendstr,&isHip);
392: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
393: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
394: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
395: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
396: else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
397: #elif defined(PETSC_HAVE_KOKKOS)
398: if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
399: #endif
400: }
401: #endif
402: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
403: PetscOptionsEnd();
404: return(0);
405: }
407: /*@
408: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
410: Logically Collective
412: Input Parameters:
413: + sf - star forest
414: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
416: Level: advanced
418: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
419: @*/
420: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
421: {
425: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
426: sf->rankorder = flg;
427: return(0);
428: }
430: /*@C
431: PetscSFSetGraph - Set a parallel star forest
433: Collective
435: Input Parameters:
436: + sf - star forest
437: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
438: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
439: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
440: during setup in debug mode)
441: . localmode - copy mode for ilocal
442: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
443: during setup in debug mode)
444: - remotemode - copy mode for iremote
446: Level: intermediate
448: Notes:
449: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
451: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
452: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
453: needed
455: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
456: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
458: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
459: @*/
460: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
461: {
468: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
469: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
471: if (sf->nroots >= 0) { /* Reset only if graph already set */
472: PetscSFReset(sf);
473: }
475: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
477: sf->nroots = nroots;
478: sf->nleaves = nleaves;
480: if (nleaves && ilocal) {
481: PetscInt i;
482: PetscInt minleaf = PETSC_MAX_INT;
483: PetscInt maxleaf = PETSC_MIN_INT;
484: int contiguous = 1;
485: for (i=0; i<nleaves; i++) {
486: minleaf = PetscMin(minleaf,ilocal[i]);
487: maxleaf = PetscMax(maxleaf,ilocal[i]);
488: contiguous &= (ilocal[i] == i);
489: }
490: sf->minleaf = minleaf;
491: sf->maxleaf = maxleaf;
492: if (contiguous) {
493: if (localmode == PETSC_OWN_POINTER) {
494: PetscFree(ilocal);
495: }
496: ilocal = NULL;
497: }
498: } else {
499: sf->minleaf = 0;
500: sf->maxleaf = nleaves - 1;
501: }
503: if (ilocal) {
504: switch (localmode) {
505: case PETSC_COPY_VALUES:
506: PetscMalloc1(nleaves,&sf->mine_alloc);
507: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
508: sf->mine = sf->mine_alloc;
509: break;
510: case PETSC_OWN_POINTER:
511: sf->mine_alloc = (PetscInt*)ilocal;
512: sf->mine = sf->mine_alloc;
513: break;
514: case PETSC_USE_POINTER:
515: sf->mine_alloc = NULL;
516: sf->mine = (PetscInt*)ilocal;
517: break;
518: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
519: }
520: }
522: switch (remotemode) {
523: case PETSC_COPY_VALUES:
524: PetscMalloc1(nleaves,&sf->remote_alloc);
525: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
526: sf->remote = sf->remote_alloc;
527: break;
528: case PETSC_OWN_POINTER:
529: sf->remote_alloc = (PetscSFNode*)iremote;
530: sf->remote = sf->remote_alloc;
531: break;
532: case PETSC_USE_POINTER:
533: sf->remote_alloc = NULL;
534: sf->remote = (PetscSFNode*)iremote;
535: break;
536: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
537: }
539: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
540: sf->graphset = PETSC_TRUE;
541: return(0);
542: }
544: /*@
545: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
547: Collective
549: Input Parameters:
550: + sf - The PetscSF
551: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
552: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
554: Notes:
555: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
556: n and N are local and global sizes of x respectively.
558: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
559: sequential vectors y on all ranks.
561: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
562: sequential vector y on rank 0.
564: In above cases, entries of x are roots and entries of y are leaves.
566: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
567: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
568: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
569: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
570: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
572: In this case, roots and leaves are symmetric.
574: Level: intermediate
575: @*/
576: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
577: {
578: MPI_Comm comm;
579: PetscInt n,N,res[2];
580: PetscMPIInt rank,size;
581: PetscSFType type;
585: PetscObjectGetComm((PetscObject)sf, &comm);
586: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
587: MPI_Comm_rank(comm,&rank);
588: MPI_Comm_size(comm,&size);
590: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
591: type = PETSCSFALLTOALL;
592: PetscLayoutCreate(comm,&sf->map);
593: PetscLayoutSetLocalSize(sf->map,size);
594: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
595: PetscLayoutSetUp(sf->map);
596: } else {
597: PetscLayoutGetLocalSize(map,&n);
598: PetscLayoutGetSize(map,&N);
599: res[0] = n;
600: res[1] = -n;
601: /* Check if n are same over all ranks so that we can optimize it */
602: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
603: if (res[0] == -res[1]) { /* same n */
604: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
605: } else {
606: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
607: }
608: PetscLayoutReference(map,&sf->map);
609: }
610: PetscSFSetType(sf,type);
612: sf->pattern = pattern;
613: sf->mine = NULL; /* Contiguous */
615: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
616: Also set other easy stuff.
617: */
618: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
619: sf->nleaves = N;
620: sf->nroots = n;
621: sf->nranks = size;
622: sf->minleaf = 0;
623: sf->maxleaf = N - 1;
624: } else if (pattern == PETSCSF_PATTERN_GATHER) {
625: sf->nleaves = rank ? 0 : N;
626: sf->nroots = n;
627: sf->nranks = rank ? 0 : size;
628: sf->minleaf = 0;
629: sf->maxleaf = rank ? -1 : N - 1;
630: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
631: sf->nleaves = size;
632: sf->nroots = size;
633: sf->nranks = size;
634: sf->minleaf = 0;
635: sf->maxleaf = size - 1;
636: }
637: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
638: sf->graphset = PETSC_TRUE;
639: return(0);
640: }
642: /*@
643: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
645: Collective
647: Input Parameter:
648: . sf - star forest to invert
650: Output Parameter:
651: . isf - inverse of sf
653: Level: advanced
655: Notes:
656: All roots must have degree 1.
658: The local space may be a permutation, but cannot be sparse.
660: .seealso: PetscSFSetGraph()
661: @*/
662: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
663: {
665: PetscMPIInt rank;
666: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
667: const PetscInt *ilocal;
668: PetscSFNode *roots,*leaves;
672: PetscSFCheckGraphSet(sf,1);
675: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
676: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
678: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
679: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
680: for (i=0; i<maxlocal; i++) {
681: leaves[i].rank = rank;
682: leaves[i].index = i;
683: }
684: for (i=0; i <nroots; i++) {
685: roots[i].rank = -1;
686: roots[i].index = -1;
687: }
688: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
689: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
691: /* Check whether our leaves are sparse */
692: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
693: if (count == nroots) newilocal = NULL;
694: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
695: PetscMalloc1(count,&newilocal);
696: for (i=0,count=0; i<nroots; i++) {
697: if (roots[i].rank >= 0) {
698: newilocal[count] = i;
699: roots[count].rank = roots[i].rank;
700: roots[count].index = roots[i].index;
701: count++;
702: }
703: }
704: }
706: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
707: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
708: PetscFree2(roots,leaves);
709: return(0);
710: }
712: /*@
713: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
715: Collective
717: Input Parameters:
718: + sf - communication object to duplicate
719: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
721: Output Parameter:
722: . newsf - new communication object
724: Level: beginner
726: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
727: @*/
728: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
729: {
730: PetscSFType type;
732: MPI_Datatype dtype=MPIU_SCALAR;
738: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
739: PetscSFGetType(sf,&type);
740: if (type) {PetscSFSetType(*newsf,type);}
741: if (opt == PETSCSF_DUPLICATE_GRAPH) {
742: PetscSFCheckGraphSet(sf,1);
743: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
744: PetscInt nroots,nleaves;
745: const PetscInt *ilocal;
746: const PetscSFNode *iremote;
747: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
748: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
749: } else {
750: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
751: }
752: }
753: /* Since oldtype is committed, so is newtype, according to MPI */
754: if (sf->vscat.bs > 1) {MPI_Type_dup(sf->vscat.unit,&dtype);}
755: (*newsf)->vscat.bs = sf->vscat.bs;
756: (*newsf)->vscat.unit = dtype;
757: (*newsf)->vscat.to_n = sf->vscat.to_n;
758: (*newsf)->vscat.from_n = sf->vscat.from_n;
759: /* Do not copy lsf. Build it on demand since it is rarely used */
761: #if defined(PETSC_HAVE_DEVICE)
762: (*newsf)->backend = sf->backend;
763: (*newsf)->unknown_input_stream= sf->unknown_input_stream;
764: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
765: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
766: #endif
767: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
768: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
769: return(0);
770: }
772: /*@C
773: PetscSFGetGraph - Get the graph specifying a parallel star forest
775: Not Collective
777: Input Parameter:
778: . sf - star forest
780: Output Parameters:
781: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
782: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
783: . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
784: - iremote - remote locations of root vertices for each leaf on the current process
786: Notes:
787: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
789: Fortran Notes:
790: The returned iremote array is a copy and must be deallocated after use. Consequently, if you
791: want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
793: To check for a NULL ilocal use
794: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
796: Level: intermediate
798: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
799: @*/
800: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
801: {
806: if (sf->ops->GetGraph) {
807: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
808: } else {
809: if (nroots) *nroots = sf->nroots;
810: if (nleaves) *nleaves = sf->nleaves;
811: if (ilocal) *ilocal = sf->mine;
812: if (iremote) *iremote = sf->remote;
813: }
814: return(0);
815: }
817: /*@
818: PetscSFGetLeafRange - Get the active leaf ranges
820: Not Collective
822: Input Parameter:
823: . sf - star forest
825: Output Parameters:
826: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
827: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
829: Level: developer
831: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
832: @*/
833: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
834: {
837: PetscSFCheckGraphSet(sf,1);
838: if (minleaf) *minleaf = sf->minleaf;
839: if (maxleaf) *maxleaf = sf->maxleaf;
840: return(0);
841: }
843: /*@C
844: PetscSFViewFromOptions - View from Options
846: Collective on PetscSF
848: Input Parameters:
849: + A - the star forest
850: . obj - Optional object
851: - name - command line option
853: Level: intermediate
854: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
855: @*/
856: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
857: {
862: PetscObjectViewFromOptions((PetscObject)A,obj,name);
863: return(0);
864: }
866: /*@C
867: PetscSFView - view a star forest
869: Collective
871: Input Parameters:
872: + sf - star forest
873: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
875: Level: beginner
877: .seealso: PetscSFCreate(), PetscSFSetGraph()
878: @*/
879: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
880: {
881: PetscErrorCode ierr;
882: PetscBool iascii;
883: PetscViewerFormat format;
887: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
890: if (sf->graphset) {PetscSFSetUp(sf);}
891: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
892: if (iascii) {
893: PetscMPIInt rank;
894: PetscInt ii,i,j;
896: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
897: PetscViewerASCIIPushTab(viewer);
898: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
899: if (!sf->graphset) {
900: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
901: PetscViewerASCIIPopTab(viewer);
902: return(0);
903: }
904: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
905: PetscViewerASCIIPushSynchronized(viewer);
906: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
907: for (i=0; i<sf->nleaves; i++) {
908: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
909: }
910: PetscViewerFlush(viewer);
911: PetscViewerGetFormat(viewer,&format);
912: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913: PetscMPIInt *tmpranks,*perm;
914: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
915: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
916: for (i=0; i<sf->nranks; i++) perm[i] = i;
917: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
918: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
919: for (ii=0; ii<sf->nranks; ii++) {
920: i = perm[ii];
921: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
922: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
923: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
924: }
925: }
926: PetscFree2(tmpranks,perm);
927: }
928: PetscViewerFlush(viewer);
929: PetscViewerASCIIPopSynchronized(viewer);
930: }
931: PetscViewerASCIIPopTab(viewer);
932: }
933: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
934: return(0);
935: }
937: /*@C
938: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
940: Not Collective
942: Input Parameter:
943: . sf - star forest
945: Output Parameters:
946: + nranks - number of ranks referenced by local part
947: . ranks - array of ranks
948: . roffset - offset in rmine/rremote for each rank (length nranks+1)
949: . rmine - concatenated array holding local indices referencing each remote rank
950: - rremote - concatenated array holding remote indices referenced for each remote rank
952: Level: developer
954: .seealso: PetscSFGetLeafRanks()
955: @*/
956: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
957: {
962: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
963: if (sf->ops->GetRootRanks) {
964: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
965: } else {
966: /* The generic implementation */
967: if (nranks) *nranks = sf->nranks;
968: if (ranks) *ranks = sf->ranks;
969: if (roffset) *roffset = sf->roffset;
970: if (rmine) *rmine = sf->rmine;
971: if (rremote) *rremote = sf->rremote;
972: }
973: return(0);
974: }
976: /*@C
977: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
979: Not Collective
981: Input Parameter:
982: . sf - star forest
984: Output Parameters:
985: + niranks - number of leaf ranks referencing roots on this process
986: . iranks - array of ranks
987: . ioffset - offset in irootloc for each rank (length niranks+1)
988: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
990: Level: developer
992: .seealso: PetscSFGetRootRanks()
993: @*/
994: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
995: {
1000: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
1001: if (sf->ops->GetLeafRanks) {
1002: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
1003: } else {
1004: PetscSFType type;
1005: PetscSFGetType(sf,&type);
1006: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1007: }
1008: return(0);
1009: }
1011: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
1012: PetscInt i;
1013: for (i=0; i<n; i++) {
1014: if (needle == list[i]) return PETSC_TRUE;
1015: }
1016: return PETSC_FALSE;
1017: }
1019: /*@C
1020: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
1022: Collective
1024: Input Parameters:
1025: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
1026: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1028: Level: developer
1030: .seealso: PetscSFGetRootRanks()
1031: @*/
1032: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1033: {
1034: PetscErrorCode ierr;
1035: PetscTable table;
1036: PetscTablePosition pos;
1037: PetscMPIInt size,groupsize,*groupranks;
1038: PetscInt *rcount,*ranks;
1039: PetscInt i, irank = -1,orank = -1;
1043: PetscSFCheckGraphSet(sf,1);
1044: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1045: PetscTableCreate(10,size,&table);
1046: for (i=0; i<sf->nleaves; i++) {
1047: /* Log 1-based rank */
1048: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1049: }
1050: PetscTableGetCount(table,&sf->nranks);
1051: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1052: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1053: PetscTableGetHeadPosition(table,&pos);
1054: for (i=0; i<sf->nranks; i++) {
1055: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1056: ranks[i]--; /* Convert back to 0-based */
1057: }
1058: PetscTableDestroy(&table);
1060: /* We expect that dgroup is reliably "small" while nranks could be large */
1061: {
1062: MPI_Group group = MPI_GROUP_NULL;
1063: PetscMPIInt *dgroupranks;
1064: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1065: MPI_Group_size(dgroup,&groupsize);
1066: PetscMalloc1(groupsize,&dgroupranks);
1067: PetscMalloc1(groupsize,&groupranks);
1068: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1069: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1070: MPI_Group_free(&group);
1071: PetscFree(dgroupranks);
1072: }
1074: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1075: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1076: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1077: if (InList(ranks[i],groupsize,groupranks)) break;
1078: }
1079: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1080: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1081: }
1082: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1083: PetscInt tmprank,tmpcount;
1085: tmprank = ranks[i];
1086: tmpcount = rcount[i];
1087: ranks[i] = ranks[sf->ndranks];
1088: rcount[i] = rcount[sf->ndranks];
1089: ranks[sf->ndranks] = tmprank;
1090: rcount[sf->ndranks] = tmpcount;
1091: sf->ndranks++;
1092: }
1093: }
1094: PetscFree(groupranks);
1095: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1096: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1097: sf->roffset[0] = 0;
1098: for (i=0; i<sf->nranks; i++) {
1099: PetscMPIIntCast(ranks[i],sf->ranks+i);
1100: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1101: rcount[i] = 0;
1102: }
1103: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1104: /* short circuit */
1105: if (orank != sf->remote[i].rank) {
1106: /* Search for index of iremote[i].rank in sf->ranks */
1107: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1108: if (irank < 0) {
1109: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1110: if (irank >= 0) irank += sf->ndranks;
1111: }
1112: orank = sf->remote[i].rank;
1113: }
1114: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1115: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1116: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1117: rcount[irank]++;
1118: }
1119: PetscFree2(rcount,ranks);
1120: return(0);
1121: }
1123: /*@C
1124: PetscSFGetGroups - gets incoming and outgoing process groups
1126: Collective
1128: Input Parameter:
1129: . sf - star forest
1131: Output Parameters:
1132: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1133: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1135: Level: developer
1137: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1138: @*/
1139: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1140: {
1142: MPI_Group group = MPI_GROUP_NULL;
1145: if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1146: if (sf->ingroup == MPI_GROUP_NULL) {
1147: PetscInt i;
1148: const PetscInt *indegree;
1149: PetscMPIInt rank,*outranks,*inranks;
1150: PetscSFNode *remote;
1151: PetscSF bgcount;
1153: /* Compute the number of incoming ranks */
1154: PetscMalloc1(sf->nranks,&remote);
1155: for (i=0; i<sf->nranks; i++) {
1156: remote[i].rank = sf->ranks[i];
1157: remote[i].index = 0;
1158: }
1159: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1160: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1161: PetscSFComputeDegreeBegin(bgcount,&indegree);
1162: PetscSFComputeDegreeEnd(bgcount,&indegree);
1163: /* Enumerate the incoming ranks */
1164: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1165: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1166: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1167: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1168: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1169: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1170: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1171: MPI_Group_free(&group);
1172: PetscFree2(inranks,outranks);
1173: PetscSFDestroy(&bgcount);
1174: }
1175: *incoming = sf->ingroup;
1177: if (sf->outgroup == MPI_GROUP_NULL) {
1178: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1179: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1180: MPI_Group_free(&group);
1181: }
1182: *outgoing = sf->outgroup;
1183: return(0);
1184: }
1186: /*@
1187: PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1189: Collective
1191: Input Parameter:
1192: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1194: Output Parameter:
1195: . multi - star forest with split roots, such that each root has degree exactly 1
1197: Level: developer
1199: Notes:
1201: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1202: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1203: edge, it is a candidate for future optimization that might involve its removal.
1205: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1206: @*/
1207: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1208: {
1214: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1215: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1216: *multi = sf->multi;
1217: sf->multi->multi = sf->multi;
1218: return(0);
1219: }
1220: if (!sf->multi) {
1221: const PetscInt *indegree;
1222: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1223: PetscSFNode *remote;
1224: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1225: PetscSFComputeDegreeBegin(sf,&indegree);
1226: PetscSFComputeDegreeEnd(sf,&indegree);
1227: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1228: inoffset[0] = 0;
1229: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1230: for (i=0; i<maxlocal; i++) outones[i] = 1;
1231: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1232: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1233: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1234: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1235: for (i=0; i<sf->nroots; i++) {
1236: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1237: }
1238: }
1239: PetscMalloc1(sf->nleaves,&remote);
1240: for (i=0; i<sf->nleaves; i++) {
1241: remote[i].rank = sf->remote[i].rank;
1242: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1243: }
1244: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1245: sf->multi->multi = sf->multi;
1246: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1247: if (sf->rankorder) { /* Sort the ranks */
1248: PetscMPIInt rank;
1249: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1250: PetscSFNode *newremote;
1251: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1252: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1253: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1254: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1255: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1256: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1257: /* Sort the incoming ranks at each vertex, build the inverse map */
1258: for (i=0; i<sf->nroots; i++) {
1259: PetscInt j;
1260: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1261: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1262: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1263: }
1264: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1265: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1266: PetscMalloc1(sf->nleaves,&newremote);
1267: for (i=0; i<sf->nleaves; i++) {
1268: newremote[i].rank = sf->remote[i].rank;
1269: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1270: }
1271: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1272: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1273: }
1274: PetscFree3(inoffset,outones,outoffset);
1275: }
1276: *multi = sf->multi;
1277: return(0);
1278: }
1280: /*@C
1281: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1283: Collective
1285: Input Parameters:
1286: + sf - original star forest
1287: . nselected - number of selected roots on this process
1288: - selected - indices of the selected roots on this process
1290: Output Parameter:
1291: . esf - new star forest
1293: Level: advanced
1295: Note:
1296: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1297: be done by calling PetscSFGetGraph().
1299: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1300: @*/
1301: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1302: {
1303: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1304: const PetscInt *ilocal;
1305: signed char *rootdata,*leafdata,*leafmem;
1306: const PetscSFNode *iremote;
1307: PetscSFNode *new_iremote;
1308: MPI_Comm comm;
1309: PetscErrorCode ierr;
1313: PetscSFCheckGraphSet(sf,1);
1317: PetscSFSetUp(sf);
1318: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1319: PetscObjectGetComm((PetscObject)sf,&comm);
1320: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1322: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1323: PetscBool dups;
1325: if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1326: for (i=0; i<nselected; i++)
1327: if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1328: }
1330: if (sf->ops->CreateEmbeddedRootSF) {
1331: (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1332: } else {
1333: /* A generic version of creating embedded sf */
1334: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1335: maxlocal = maxleaf - minleaf + 1;
1336: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1337: leafdata = leafmem - minleaf;
1338: /* Tag selected roots and bcast to leaves */
1339: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1340: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1341: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1343: /* Build esf with leaves that are still connected */
1344: esf_nleaves = 0;
1345: for (i=0; i<nleaves; i++) {
1346: j = ilocal ? ilocal[i] : i;
1347: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1348: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1349: */
1350: esf_nleaves += (leafdata[j] ? 1 : 0);
1351: }
1352: PetscMalloc1(esf_nleaves,&new_ilocal);
1353: PetscMalloc1(esf_nleaves,&new_iremote);
1354: for (i=n=0; i<nleaves; i++) {
1355: j = ilocal ? ilocal[i] : i;
1356: if (leafdata[j]) {
1357: new_ilocal[n] = j;
1358: new_iremote[n].rank = iremote[i].rank;
1359: new_iremote[n].index = iremote[i].index;
1360: ++n;
1361: }
1362: }
1363: PetscSFCreate(comm,esf);
1364: PetscSFSetFromOptions(*esf);
1365: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1366: PetscFree2(rootdata,leafmem);
1367: }
1368: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1369: return(0);
1370: }
1372: /*@C
1373: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1375: Collective
1377: Input Parameters:
1378: + sf - original star forest
1379: . nselected - number of selected leaves on this process
1380: - selected - indices of the selected leaves on this process
1382: Output Parameter:
1383: . newsf - new star forest
1385: Level: advanced
1387: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1388: @*/
1389: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1390: {
1391: const PetscSFNode *iremote;
1392: PetscSFNode *new_iremote;
1393: const PetscInt *ilocal;
1394: PetscInt i,nroots,*leaves,*new_ilocal;
1395: MPI_Comm comm;
1396: PetscErrorCode ierr;
1400: PetscSFCheckGraphSet(sf,1);
1404: /* Uniq selected[] and put results in leaves[] */
1405: PetscObjectGetComm((PetscObject)sf,&comm);
1406: PetscMalloc1(nselected,&leaves);
1407: PetscArraycpy(leaves,selected,nselected);
1408: PetscSortedRemoveDupsInt(&nselected,leaves);
1409: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1411: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1412: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1413: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1414: } else {
1415: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1416: PetscMalloc1(nselected,&new_ilocal);
1417: PetscMalloc1(nselected,&new_iremote);
1418: for (i=0; i<nselected; ++i) {
1419: const PetscInt l = leaves[i];
1420: new_ilocal[i] = ilocal ? ilocal[l] : l;
1421: new_iremote[i].rank = iremote[l].rank;
1422: new_iremote[i].index = iremote[l].index;
1423: }
1424: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1425: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1426: }
1427: PetscFree(leaves);
1428: return(0);
1429: }
1431: /*@C
1432: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1434: Collective on PetscSF
1436: Input Parameters:
1437: + sf - star forest on which to communicate
1438: . unit - data type associated with each node
1439: . rootdata - buffer to broadcast
1440: - op - operation to use for reduction
1442: Output Parameter:
1443: . leafdata - buffer to be reduced with values from each leaf's respective root
1445: Level: intermediate
1447: Notes:
1448: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1449: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1450: use PetscSFBcastWithMemTypeBegin() instead.
1451: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1452: @*/
1453: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1454: {
1456: PetscMemType rootmtype,leafmtype;
1460: PetscSFSetUp(sf);
1461: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1462: PetscGetMemType(rootdata,&rootmtype);
1463: PetscGetMemType(leafdata,&leafmtype);
1464: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1465: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1466: return(0);
1467: }
1469: /*@C
1470: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1472: Collective on PetscSF
1474: Input Parameters:
1475: + sf - star forest on which to communicate
1476: . unit - data type associated with each node
1477: . rootmtype - memory type of rootdata
1478: . rootdata - buffer to broadcast
1479: . leafmtype - memory type of leafdata
1480: - op - operation to use for reduction
1482: Output Parameter:
1483: . leafdata - buffer to be reduced with values from each leaf's respective root
1485: Level: intermediate
1487: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1488: @*/
1489: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1490: {
1495: PetscSFSetUp(sf);
1496: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1497: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1498: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1499: return(0);
1500: }
1502: /*@C
1503: PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1505: Collective
1507: Input Parameters:
1508: + sf - star forest
1509: . unit - data type
1510: . rootdata - buffer to broadcast
1511: - op - operation to use for reduction
1513: Output Parameter:
1514: . leafdata - buffer to be reduced with values from each leaf's respective root
1516: Level: intermediate
1518: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1519: @*/
1520: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1521: {
1526: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);}
1527: (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1528: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);}
1529: return(0);
1530: }
1532: /*@C
1533: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1535: Collective
1537: Input Parameters:
1538: + sf - star forest
1539: . unit - data type
1540: . leafdata - values to reduce
1541: - op - reduction operation
1543: Output Parameter:
1544: . rootdata - result of reduction of values from all leaves of each root
1546: Level: intermediate
1548: Notes:
1549: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1550: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1551: use PetscSFReduceWithMemTypeBegin() instead.
1553: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1554: @*/
1555: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1556: {
1558: PetscMemType rootmtype,leafmtype;
1562: PetscSFSetUp(sf);
1563: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1564: PetscGetMemType(rootdata,&rootmtype);
1565: PetscGetMemType(leafdata,&leafmtype);
1566: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1567: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1568: return(0);
1569: }
1571: /*@C
1572: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1574: Collective
1576: Input Parameters:
1577: + sf - star forest
1578: . unit - data type
1579: . leafmtype - memory type of leafdata
1580: . leafdata - values to reduce
1581: . rootmtype - memory type of rootdata
1582: - op - reduction operation
1584: Output Parameter:
1585: . rootdata - result of reduction of values from all leaves of each root
1587: Level: intermediate
1589: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1590: @*/
1591: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1592: {
1597: PetscSFSetUp(sf);
1598: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1599: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1600: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1601: return(0);
1602: }
1604: /*@C
1605: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1607: Collective
1609: Input Parameters:
1610: + sf - star forest
1611: . unit - data type
1612: . leafdata - values to reduce
1613: - op - reduction operation
1615: Output Parameter:
1616: . rootdata - result of reduction of values from all leaves of each root
1618: Level: intermediate
1620: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1621: @*/
1622: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1623: {
1628: if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);}
1629: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1630: if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);}
1631: return(0);
1632: }
1634: /*@C
1635: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1637: Collective
1639: Input Parameters:
1640: + sf - star forest
1641: . unit - data type
1642: . leafdata - leaf values to use in reduction
1643: - op - operation to use for reduction
1645: Output Parameters:
1646: + rootdata - root values to be updated, input state is seen by first process to perform an update
1647: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1649: Level: advanced
1651: Note:
1652: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1653: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1654: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1655: integers.
1657: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1658: @*/
1659: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1660: {
1662: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1666: PetscSFSetUp(sf);
1667: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1668: PetscGetMemType(rootdata,&rootmtype);
1669: PetscGetMemType(leafdata,&leafmtype);
1670: PetscGetMemType(leafupdate,&leafupdatemtype);
1671: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1672: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1673: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1674: return(0);
1675: }
1677: /*@C
1678: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1680: Collective
1682: Input Parameters:
1683: + sf - star forest
1684: . unit - data type
1685: . leafdata - leaf values to use in reduction
1686: - op - operation to use for reduction
1688: Output Parameters:
1689: + rootdata - root values to be updated, input state is seen by first process to perform an update
1690: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1692: Level: advanced
1694: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1695: @*/
1696: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1697: {
1702: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1703: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1704: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1705: return(0);
1706: }
1708: /*@C
1709: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1711: Collective
1713: Input Parameter:
1714: . sf - star forest
1716: Output Parameter:
1717: . degree - degree of each root vertex
1719: Level: advanced
1721: Notes:
1722: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1724: .seealso: PetscSFGatherBegin()
1725: @*/
1726: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1727: {
1732: PetscSFCheckGraphSet(sf,1);
1734: if (!sf->degreeknown) {
1735: PetscInt i, nroots = sf->nroots, maxlocal;
1736: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1737: maxlocal = sf->maxleaf-sf->minleaf+1;
1738: PetscMalloc1(nroots,&sf->degree);
1739: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1740: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1741: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1742: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1743: }
1744: *degree = NULL;
1745: return(0);
1746: }
1748: /*@C
1749: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1751: Collective
1753: Input Parameter:
1754: . sf - star forest
1756: Output Parameter:
1757: . degree - degree of each root vertex
1759: Level: developer
1761: Notes:
1762: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1764: .seealso:
1765: @*/
1766: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1767: {
1772: PetscSFCheckGraphSet(sf,1);
1774: if (!sf->degreeknown) {
1775: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1776: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1777: PetscFree(sf->degreetmp);
1778: sf->degreeknown = PETSC_TRUE;
1779: }
1780: *degree = sf->degree;
1781: return(0);
1782: }
1784: /*@C
1785: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1786: Each multi-root is assigned index of the corresponding original root.
1788: Collective
1790: Input Parameters:
1791: + sf - star forest
1792: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1794: Output Parameters:
1795: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1796: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1798: Level: developer
1800: Notes:
1801: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1803: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1804: @*/
1805: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1806: {
1807: PetscSF msf;
1808: PetscInt i, j, k, nroots, nmroots;
1809: PetscErrorCode ierr;
1813: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1817: PetscSFGetMultiSF(sf,&msf);
1818: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1819: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1820: for (i=0,j=0,k=0; i<nroots; i++) {
1821: if (!degree[i]) continue;
1822: for (j=0; j<degree[i]; j++,k++) {
1823: (*multiRootsOrigNumbering)[k] = i;
1824: }
1825: }
1826: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1827: if (nMultiRoots) *nMultiRoots = nmroots;
1828: return(0);
1829: }
1831: /*@C
1832: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1834: Collective
1836: Input Parameters:
1837: + sf - star forest
1838: . unit - data type
1839: - leafdata - leaf data to gather to roots
1841: Output Parameter:
1842: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1844: Level: intermediate
1846: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1847: @*/
1848: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1849: {
1851: PetscSF multi = NULL;
1855: PetscSFSetUp(sf);
1856: PetscSFGetMultiSF(sf,&multi);
1857: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1858: return(0);
1859: }
1861: /*@C
1862: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1864: Collective
1866: Input Parameters:
1867: + sf - star forest
1868: . unit - data type
1869: - leafdata - leaf data to gather to roots
1871: Output Parameter:
1872: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1874: Level: intermediate
1876: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1877: @*/
1878: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1879: {
1881: PetscSF multi = NULL;
1885: PetscSFGetMultiSF(sf,&multi);
1886: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1887: return(0);
1888: }
1890: /*@C
1891: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1893: Collective
1895: Input Parameters:
1896: + sf - star forest
1897: . unit - data type
1898: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1900: Output Parameter:
1901: . leafdata - leaf data to be update with personal data from each respective root
1903: Level: intermediate
1905: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1906: @*/
1907: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1908: {
1910: PetscSF multi = NULL;
1914: PetscSFSetUp(sf);
1915: PetscSFGetMultiSF(sf,&multi);
1916: PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1917: return(0);
1918: }
1920: /*@C
1921: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1923: Collective
1925: Input Parameters:
1926: + sf - star forest
1927: . unit - data type
1928: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1930: Output Parameter:
1931: . leafdata - leaf data to be update with personal data from each respective root
1933: Level: intermediate
1935: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1936: @*/
1937: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1938: {
1940: PetscSF multi = NULL;
1944: PetscSFGetMultiSF(sf,&multi);
1945: PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1946: return(0);
1947: }
1949: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1950: {
1951: PetscInt i, n, nleaves;
1952: const PetscInt *ilocal = NULL;
1953: PetscHSetI seen;
1954: PetscErrorCode ierr;
1957: if (PetscDefined(USE_DEBUG)) {
1958: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1959: PetscHSetICreate(&seen);
1960: for (i = 0; i < nleaves; i++) {
1961: const PetscInt leaf = ilocal ? ilocal[i] : i;
1962: PetscHSetIAdd(seen,leaf);
1963: }
1964: PetscHSetIGetSize(seen,&n);
1965: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1966: PetscHSetIDestroy(&seen);
1967: }
1968: return(0);
1969: }
1971: /*@
1972: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1974: Input Parameters:
1975: + sfA - The first PetscSF
1976: - sfB - The second PetscSF
1978: Output Parameters:
1979: . sfBA - The composite SF
1981: Level: developer
1983: Notes:
1984: Currently, the two SFs must be defined on congruent communicators and they must be true star
1985: forests, i.e. the same leaf is not connected with different roots.
1987: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1988: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1989: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1990: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1992: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1993: @*/
1994: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1995: {
1996: PetscErrorCode ierr;
1997: const PetscSFNode *remotePointsA,*remotePointsB;
1998: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1999: const PetscInt *localPointsA,*localPointsB;
2000: PetscInt *localPointsBA;
2001: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2002: PetscBool denseB;
2006: PetscSFCheckGraphSet(sfA,1);
2008: PetscSFCheckGraphSet(sfB,2);
2011: PetscSFCheckLeavesUnique_Private(sfA);
2012: PetscSFCheckLeavesUnique_Private(sfB);
2014: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
2015: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
2016: if (localPointsA) {
2017: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
2018: for (i=0; i<numRootsB; i++) {
2019: reorderedRemotePointsA[i].rank = -1;
2020: reorderedRemotePointsA[i].index = -1;
2021: }
2022: for (i=0; i<numLeavesA; i++) {
2023: if (localPointsA[i] >= numRootsB) continue;
2024: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2025: }
2026: remotePointsA = reorderedRemotePointsA;
2027: }
2028: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
2029: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
2030: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2031: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2032: PetscFree(reorderedRemotePointsA);
2034: denseB = (PetscBool)!localPointsB;
2035: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2036: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2037: else numLeavesBA++;
2038: }
2039: if (denseB) {
2040: localPointsBA = NULL;
2041: remotePointsBA = leafdataB;
2042: } else {
2043: PetscMalloc1(numLeavesBA,&localPointsBA);
2044: PetscMalloc1(numLeavesBA,&remotePointsBA);
2045: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2046: const PetscInt l = localPointsB ? localPointsB[i] : i;
2048: if (leafdataB[l-minleaf].rank == -1) continue;
2049: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2050: localPointsBA[numLeavesBA] = l;
2051: numLeavesBA++;
2052: }
2053: PetscFree(leafdataB);
2054: }
2055: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2056: PetscSFSetFromOptions(*sfBA);
2057: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2058: return(0);
2059: }
2061: /*@
2062: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2064: Input Parameters:
2065: + sfA - The first PetscSF
2066: - sfB - The second PetscSF
2068: Output Parameters:
2069: . sfBA - The composite SF.
2071: Level: developer
2073: Notes:
2074: Currently, the two SFs must be defined on congruent communicators and they must be true star
2075: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2076: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2078: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2079: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2080: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2081: a Reduce on sfB, on connected roots.
2083: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2084: @*/
2085: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2086: {
2087: PetscErrorCode ierr;
2088: const PetscSFNode *remotePointsA,*remotePointsB;
2089: PetscSFNode *remotePointsBA;
2090: const PetscInt *localPointsA,*localPointsB;
2091: PetscSFNode *reorderedRemotePointsA = NULL;
2092: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2093: MPI_Op op;
2094: #if defined(PETSC_USE_64BIT_INDICES)
2095: PetscBool iswin;
2096: #endif
2100: PetscSFCheckGraphSet(sfA,1);
2102: PetscSFCheckGraphSet(sfB,2);
2105: PetscSFCheckLeavesUnique_Private(sfA);
2106: PetscSFCheckLeavesUnique_Private(sfB);
2108: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2109: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2111: /* TODO: Check roots of sfB have degree of 1 */
2112: /* Once we implement it, we can replace the MPI_MAXLOC
2113: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2114: We use MPI_MAXLOC only to have a deterministic output from this routine if
2115: the root condition is not meet.
2116: */
2117: op = MPI_MAXLOC;
2118: #if defined(PETSC_USE_64BIT_INDICES)
2119: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2120: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2121: if (iswin) op = MPI_REPLACE;
2122: #endif
2124: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2125: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2126: for (i=0; i<maxleaf - minleaf + 1; i++) {
2127: reorderedRemotePointsA[i].rank = -1;
2128: reorderedRemotePointsA[i].index = -1;
2129: }
2130: if (localPointsA) {
2131: for (i=0; i<numLeavesA; i++) {
2132: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2133: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2134: }
2135: } else {
2136: for (i=0; i<numLeavesA; i++) {
2137: if (i > maxleaf || i < minleaf) continue;
2138: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2139: }
2140: }
2142: PetscMalloc1(numRootsB,&localPointsBA);
2143: PetscMalloc1(numRootsB,&remotePointsBA);
2144: for (i=0; i<numRootsB; i++) {
2145: remotePointsBA[i].rank = -1;
2146: remotePointsBA[i].index = -1;
2147: }
2149: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2150: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2151: PetscFree(reorderedRemotePointsA);
2152: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2153: if (remotePointsBA[i].rank == -1) continue;
2154: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2155: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2156: localPointsBA[numLeavesBA] = i;
2157: numLeavesBA++;
2158: }
2159: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2160: PetscSFSetFromOptions(*sfBA);
2161: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2162: return(0);
2163: }
2165: /*
2166: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2168: Input Parameters:
2169: . sf - The global PetscSF
2171: Output Parameters:
2172: . out - The local PetscSF
2173: */
2174: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2175: {
2176: MPI_Comm comm;
2177: PetscMPIInt myrank;
2178: const PetscInt *ilocal;
2179: const PetscSFNode *iremote;
2180: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2181: PetscSFNode *liremote;
2182: PetscSF lsf;
2183: PetscErrorCode ierr;
2187: if (sf->ops->CreateLocalSF) {
2188: (*sf->ops->CreateLocalSF)(sf,out);
2189: } else {
2190: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2191: PetscObjectGetComm((PetscObject)sf,&comm);
2192: MPI_Comm_rank(comm,&myrank);
2194: /* Find out local edges and build a local SF */
2195: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2196: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2197: PetscMalloc1(lnleaves,&lilocal);
2198: PetscMalloc1(lnleaves,&liremote);
2200: for (i=j=0; i<nleaves; i++) {
2201: if (iremote[i].rank == (PetscInt)myrank) {
2202: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2203: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2204: liremote[j].index = iremote[i].index;
2205: j++;
2206: }
2207: }
2208: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2209: PetscSFSetFromOptions(lsf);
2210: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2211: PetscSFSetUp(lsf);
2212: *out = lsf;
2213: }
2214: return(0);
2215: }
2217: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2218: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2219: {
2220: PetscErrorCode ierr;
2221: PetscMemType rootmtype,leafmtype;
2225: PetscSFSetUp(sf);
2226: PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2227: PetscGetMemType(rootdata,&rootmtype);
2228: PetscGetMemType(leafdata,&leafmtype);
2229: if (sf->ops->BcastToZero) {
2230: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2231: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2232: PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2233: return(0);
2234: }