Actual source code: sf.c
petsc-3.14.6 2021-03-30
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_HAVE_CUDA)
6: #include <cuda_runtime.h>
7: #endif
9: #if defined(PETSC_HAVE_HIP)
10: #include <hip/hip_runtime.h>
11: #endif
13: #if defined(PETSC_USE_DEBUG)
14: # define PetscSFCheckGraphSet(sf,arg) do { \
15: if (PetscUnlikely(!(sf)->graphset)) \
16: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17: } while (0)
18: #else
19: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20: #endif
22: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
24: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25: {
28: *type = PETSC_MEMTYPE_HOST;
29: #if defined(PETSC_HAVE_CUDA)
30: if (PetscCUDAInitialized && data) {
31: cudaError_t cerr;
32: struct cudaPointerAttributes attr;
33: enum cudaMemoryType mtype;
34: cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35: cudaGetLastError(); /* Reset the last error */
36: #if (CUDART_VERSION < 10000)
37: mtype = attr.memoryType;
38: #else
39: mtype = attr.type;
40: #endif
41: if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42: }
43: #endif
45: #if defined(PETSC_HAVE_HIP)
46: if (PetscHIPInitialized && data) {
47: hipError_t cerr;
48: struct hipPointerAttribute_t attr;
49: enum hipMemoryType mtype;
50: cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
51: hipGetLastError(); /* Reset the last error */
52: mtype = attr.memoryType;
53: if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54: }
55: #endif
56: return(0);
57: }
59: /*@
60: PetscSFCreate - create a star forest communication context
62: Collective
64: Input Arguments:
65: . comm - communicator on which the star forest will operate
67: Output Arguments:
68: . sf - new star forest context
70: Options Database Keys:
71: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
72: . -sf_type window -Use MPI-3 one-sided window for communication
73: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
75: Level: intermediate
77: Notes:
78: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80: SFs are optimized and they have better performance than general SFs.
82: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83: @*/
84: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85: {
87: PetscSF b;
91: PetscSFInitializePackage();
93: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
95: b->nroots = -1;
96: b->nleaves = -1;
97: b->minleaf = PETSC_MAX_INT;
98: b->maxleaf = PETSC_MIN_INT;
99: b->nranks = -1;
100: b->rankorder = PETSC_TRUE;
101: b->ingroup = MPI_GROUP_NULL;
102: b->outgroup = MPI_GROUP_NULL;
103: b->graphset = PETSC_FALSE;
104: #if defined(PETSC_HAVE_DEVICE)
105: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
106: b->use_stream_aware_mpi = PETSC_FALSE;
107: b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109: b->backend = PETSCSF_BACKEND_KOKKOS;
110: #elif defined(PETSC_HAVE_CUDA)
111: b->backend = PETSCSF_BACKEND_CUDA;
112: #endif
113: #endif
114: *sf = b;
115: return(0);
116: }
118: /*@
119: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
121: Collective
123: Input Arguments:
124: . sf - star forest
126: Level: advanced
128: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
129: @*/
130: PetscErrorCode PetscSFReset(PetscSF sf)
131: {
136: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
137: sf->nroots = -1;
138: sf->nleaves = -1;
139: sf->minleaf = PETSC_MAX_INT;
140: sf->maxleaf = PETSC_MIN_INT;
141: sf->mine = NULL;
142: sf->remote = NULL;
143: sf->graphset = PETSC_FALSE;
144: PetscFree(sf->mine_alloc);
145: PetscFree(sf->remote_alloc);
146: sf->nranks = -1;
147: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
148: #if defined(PETSC_HAVE_DEVICE)
149: for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
150: #endif
151: sf->degreeknown = PETSC_FALSE;
152: PetscFree(sf->degree);
153: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
154: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
155: PetscSFDestroy(&sf->multi);
156: PetscLayoutDestroy(&sf->map);
157: sf->setupcalled = PETSC_FALSE;
158: return(0);
159: }
161: /*@C
162: PetscSFSetType - Set the PetscSF communication implementation
164: Collective on PetscSF
166: Input Parameters:
167: + sf - the PetscSF context
168: - type - a known method
170: Options Database Key:
171: . -sf_type <type> - Sets the method; use -help for a list
172: of available methods (for instance, window, basic, neighbor)
174: Notes:
175: See "include/petscsf.h" for available methods (for instance)
176: + PETSCSFWINDOW - MPI-2/3 one-sided
177: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
179: Level: intermediate
181: .seealso: PetscSFType, PetscSFCreate()
182: @*/
183: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
184: {
185: PetscErrorCode ierr,(*r)(PetscSF);
186: PetscBool match;
192: PetscObjectTypeCompare((PetscObject)sf,type,&match);
193: if (match) return(0);
195: PetscFunctionListFind(PetscSFList,type,&r);
196: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
197: /* Destroy the previous PetscSF implementation context */
198: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
199: PetscMemzero(sf->ops,sizeof(*sf->ops));
200: PetscObjectChangeTypeName((PetscObject)sf,type);
201: (*r)(sf);
202: return(0);
203: }
205: /*@C
206: PetscSFGetType - Get the PetscSF communication implementation
208: Not Collective
210: Input Parameter:
211: . sf - the PetscSF context
213: Output Parameter:
214: . type - the PetscSF type name
216: Level: intermediate
218: .seealso: PetscSFSetType(), PetscSFCreate()
219: @*/
220: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
221: {
225: *type = ((PetscObject)sf)->type_name;
226: return(0);
227: }
229: /*@
230: PetscSFDestroy - destroy star forest
232: Collective
234: Input Arguments:
235: . sf - address of star forest
237: Level: intermediate
239: .seealso: PetscSFCreate(), PetscSFReset()
240: @*/
241: PetscErrorCode PetscSFDestroy(PetscSF *sf)
242: {
246: if (!*sf) return(0);
248: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
249: PetscSFReset(*sf);
250: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
251: PetscHeaderDestroy(sf);
252: return(0);
253: }
255: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256: {
257: PetscInt i, nleaves;
258: PetscMPIInt size;
259: const PetscInt *ilocal;
260: const PetscSFNode *iremote;
261: PetscErrorCode ierr;
264: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
265: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
266: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
267: for (i = 0; i < nleaves; i++) {
268: const PetscInt rank = iremote[i].rank;
269: const PetscInt remote = iremote[i].index;
270: const PetscInt leaf = ilocal ? ilocal[i] : i;
271: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274: }
275: return(0);
276: }
281: /*@
282: PetscSFSetUp - set up communication structures
284: Collective
286: Input Arguments:
287: . sf - star forest communication object
289: Level: beginner
291: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
292: @*/
293: PetscErrorCode PetscSFSetUp(PetscSF sf)
294: {
299: PetscSFCheckGraphSet(sf,1);
300: if (sf->setupcalled) return(0);
301: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
302: PetscSFCheckGraphValid_Private(sf);
303: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
304: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
305: #if defined(PETSC_HAVE_CUDA)
306: if (sf->backend == PETSCSF_BACKEND_CUDA) {
307: sf->ops->Malloc = PetscSFMalloc_Cuda;
308: sf->ops->Free = PetscSFFree_Cuda;
309: }
310: #endif
312: #if defined(PETSC_HAVE_KOKKOS)
313: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
314: sf->ops->Malloc = PetscSFMalloc_Kokkos;
315: sf->ops->Free = PetscSFFree_Kokkos;
316: }
317: #endif
318: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
319: sf->setupcalled = PETSC_TRUE;
320: return(0);
321: }
323: /*@
324: PetscSFSetFromOptions - set PetscSF options using the options database
326: Logically Collective
328: Input Arguments:
329: . sf - star forest
331: Options Database Keys:
332: + -sf_type - implementation type, see PetscSFSetType()
333: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336: If true, this option only works with -use_gpu_aware_mpi 1.
337: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338: If true, this option only works with -use_gpu_aware_mpi 1.
340: - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
341: On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
342: the only available is kokkos.
344: Level: intermediate
345: @*/
346: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347: {
348: PetscSFType deft;
349: char type[256];
351: PetscBool flg;
355: PetscObjectOptionsBegin((PetscObject)sf);
356: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
357: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
358: PetscSFSetType(sf,flg ? type : deft);
359: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
360: #if defined(PETSC_HAVE_DEVICE)
361: {
362: char backendstr[32] = {0};
363: PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
364: /* Change the defaults set in PetscSFCreate() with command line options */
365: PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);
366: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
367: PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
368: PetscStrcasecmp("cuda",backendstr,&isCuda);
369: PetscStrcasecmp("kokkos",backendstr,&isKokkos);
370: #if defined(PETSC_HAVE_CUDA)
371: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373: else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
374: #elif defined(PETSC_HAVE_KOKKOS)
375: if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376: #endif
377: }
378: #endif
379: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
380: PetscOptionsEnd();
381: return(0);
382: }
384: /*@
385: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
387: Logically Collective
389: Input Arguments:
390: + sf - star forest
391: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
393: Level: advanced
395: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
396: @*/
397: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
398: {
402: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
403: sf->rankorder = flg;
404: return(0);
405: }
407: /*@
408: PetscSFSetGraph - Set a parallel star forest
410: Collective
412: Input Arguments:
413: + sf - star forest
414: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
415: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
416: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417: during setup in debug mode)
418: . localmode - copy mode for ilocal
419: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420: during setup in debug mode)
421: - remotemode - copy mode for iremote
423: Level: intermediate
425: Notes:
426: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
428: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
429: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
430: needed
432: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
435: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
436: @*/
437: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
438: {
445: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
446: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
448: if (sf->nroots >= 0) { /* Reset only if graph already set */
449: PetscSFReset(sf);
450: }
452: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
454: sf->nroots = nroots;
455: sf->nleaves = nleaves;
457: if (nleaves && ilocal) {
458: PetscInt i;
459: PetscInt minleaf = PETSC_MAX_INT;
460: PetscInt maxleaf = PETSC_MIN_INT;
461: int contiguous = 1;
462: for (i=0; i<nleaves; i++) {
463: minleaf = PetscMin(minleaf,ilocal[i]);
464: maxleaf = PetscMax(maxleaf,ilocal[i]);
465: contiguous &= (ilocal[i] == i);
466: }
467: sf->minleaf = minleaf;
468: sf->maxleaf = maxleaf;
469: if (contiguous) {
470: if (localmode == PETSC_OWN_POINTER) {
471: PetscFree(ilocal);
472: }
473: ilocal = NULL;
474: }
475: } else {
476: sf->minleaf = 0;
477: sf->maxleaf = nleaves - 1;
478: }
480: if (ilocal) {
481: switch (localmode) {
482: case PETSC_COPY_VALUES:
483: PetscMalloc1(nleaves,&sf->mine_alloc);
484: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
485: sf->mine = sf->mine_alloc;
486: break;
487: case PETSC_OWN_POINTER:
488: sf->mine_alloc = (PetscInt*)ilocal;
489: sf->mine = sf->mine_alloc;
490: break;
491: case PETSC_USE_POINTER:
492: sf->mine_alloc = NULL;
493: sf->mine = (PetscInt*)ilocal;
494: break;
495: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
496: }
497: }
499: switch (remotemode) {
500: case PETSC_COPY_VALUES:
501: PetscMalloc1(nleaves,&sf->remote_alloc);
502: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
503: sf->remote = sf->remote_alloc;
504: break;
505: case PETSC_OWN_POINTER:
506: sf->remote_alloc = (PetscSFNode*)iremote;
507: sf->remote = sf->remote_alloc;
508: break;
509: case PETSC_USE_POINTER:
510: sf->remote_alloc = NULL;
511: sf->remote = (PetscSFNode*)iremote;
512: break;
513: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
514: }
516: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
517: sf->graphset = PETSC_TRUE;
518: return(0);
519: }
521: /*@
522: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
524: Collective
526: Input Parameters:
527: + sf - The PetscSF
528: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
531: Notes:
532: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533: n and N are local and global sizes of x respectively.
535: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536: sequential vectors y on all ranks.
538: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539: sequential vector y on rank 0.
541: In above cases, entries of x are roots and entries of y are leaves.
543: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
549: In this case, roots and leaves are symmetric.
551: Level: intermediate
552: @*/
553: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554: {
555: MPI_Comm comm;
556: PetscInt n,N,res[2];
557: PetscMPIInt rank,size;
558: PetscSFType type;
562: PetscObjectGetComm((PetscObject)sf, &comm);
563: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564: MPI_Comm_rank(comm,&rank);
565: MPI_Comm_size(comm,&size);
567: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568: type = PETSCSFALLTOALL;
569: PetscLayoutCreate(comm,&sf->map);
570: PetscLayoutSetLocalSize(sf->map,size);
571: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
572: PetscLayoutSetUp(sf->map);
573: } else {
574: PetscLayoutGetLocalSize(map,&n);
575: PetscLayoutGetSize(map,&N);
576: res[0] = n;
577: res[1] = -n;
578: /* Check if n are same over all ranks so that we can optimize it */
579: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
580: if (res[0] == -res[1]) { /* same n */
581: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
582: } else {
583: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584: }
585: PetscLayoutReference(map,&sf->map);
586: }
587: PetscSFSetType(sf,type);
589: sf->pattern = pattern;
590: sf->mine = NULL; /* Contiguous */
592: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593: Also set other easy stuff.
594: */
595: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596: sf->nleaves = N;
597: sf->nroots = n;
598: sf->nranks = size;
599: sf->minleaf = 0;
600: sf->maxleaf = N - 1;
601: } else if (pattern == PETSCSF_PATTERN_GATHER) {
602: sf->nleaves = rank ? 0 : N;
603: sf->nroots = n;
604: sf->nranks = rank ? 0 : size;
605: sf->minleaf = 0;
606: sf->maxleaf = rank ? -1 : N - 1;
607: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608: sf->nleaves = size;
609: sf->nroots = size;
610: sf->nranks = size;
611: sf->minleaf = 0;
612: sf->maxleaf = size - 1;
613: }
614: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615: sf->graphset = PETSC_TRUE;
616: return(0);
617: }
619: /*@
620: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
622: Collective
624: Input Arguments:
626: . sf - star forest to invert
628: Output Arguments:
629: . isf - inverse of sf
630: Level: advanced
632: Notes:
633: All roots must have degree 1.
635: The local space may be a permutation, but cannot be sparse.
637: .seealso: PetscSFSetGraph()
638: @*/
639: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
640: {
642: PetscMPIInt rank;
643: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
644: const PetscInt *ilocal;
645: PetscSFNode *roots,*leaves;
649: PetscSFCheckGraphSet(sf,1);
652: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
653: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
655: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
656: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
657: for (i=0; i<maxlocal; i++) {
658: leaves[i].rank = rank;
659: leaves[i].index = i;
660: }
661: for (i=0; i <nroots; i++) {
662: roots[i].rank = -1;
663: roots[i].index = -1;
664: }
665: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
666: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
668: /* Check whether our leaves are sparse */
669: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
670: if (count == nroots) newilocal = NULL;
671: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672: PetscMalloc1(count,&newilocal);
673: for (i=0,count=0; i<nroots; i++) {
674: if (roots[i].rank >= 0) {
675: newilocal[count] = i;
676: roots[count].rank = roots[i].rank;
677: roots[count].index = roots[i].index;
678: count++;
679: }
680: }
681: }
683: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
684: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
685: PetscFree2(roots,leaves);
686: return(0);
687: }
689: /*@
690: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
692: Collective
694: Input Arguments:
695: + sf - communication object to duplicate
696: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
698: Output Arguments:
699: . newsf - new communication object
701: Level: beginner
703: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
704: @*/
705: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
706: {
707: PetscSFType type;
714: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
715: PetscSFGetType(sf,&type);
716: if (type) {PetscSFSetType(*newsf,type);}
717: if (opt == PETSCSF_DUPLICATE_GRAPH) {
718: PetscSFCheckGraphSet(sf,1);
719: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
720: PetscInt nroots,nleaves;
721: const PetscInt *ilocal;
722: const PetscSFNode *iremote;
723: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
724: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
725: } else {
726: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
727: }
728: }
729: #if defined(PETSC_HAVE_DEVICE)
730: (*newsf)->backend = sf->backend;
731: (*newsf)->use_default_stream = sf->use_default_stream;
732: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
733: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
734: #endif
735: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
736: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
737: return(0);
738: }
740: /*@C
741: PetscSFGetGraph - Get the graph specifying a parallel star forest
743: Not Collective
745: Input Arguments:
746: . sf - star forest
748: Output Arguments:
749: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
750: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
751: . ilocal - locations of leaves in leafdata buffers
752: - iremote - remote locations of root vertices for each leaf on the current process
754: Notes:
755: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
757: When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758: want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
760: Level: intermediate
762: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763: @*/
764: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765: {
770: if (sf->ops->GetGraph) {
771: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
772: } else {
773: if (nroots) *nroots = sf->nroots;
774: if (nleaves) *nleaves = sf->nleaves;
775: if (ilocal) *ilocal = sf->mine;
776: if (iremote) *iremote = sf->remote;
777: }
778: return(0);
779: }
781: /*@
782: PetscSFGetLeafRange - Get the active leaf ranges
784: Not Collective
786: Input Arguments:
787: . sf - star forest
789: Output Arguments:
790: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
793: Level: developer
795: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
796: @*/
797: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798: {
801: PetscSFCheckGraphSet(sf,1);
802: if (minleaf) *minleaf = sf->minleaf;
803: if (maxleaf) *maxleaf = sf->maxleaf;
804: return(0);
805: }
807: /*@C
808: PetscSFViewFromOptions - View from Options
810: Collective on PetscSF
812: Input Parameters:
813: + A - the star forest
814: . obj - Optional object
815: - name - command line option
817: Level: intermediate
818: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819: @*/
820: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821: {
826: PetscObjectViewFromOptions((PetscObject)A,obj,name);
827: return(0);
828: }
830: /*@C
831: PetscSFView - view a star forest
833: Collective
835: Input Arguments:
836: + sf - star forest
837: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
839: Level: beginner
841: .seealso: PetscSFCreate(), PetscSFSetGraph()
842: @*/
843: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
844: {
845: PetscErrorCode ierr;
846: PetscBool iascii;
847: PetscViewerFormat format;
851: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
854: if (sf->graphset) {PetscSFSetUp(sf);}
855: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
856: if (iascii) {
857: PetscMPIInt rank;
858: PetscInt ii,i,j;
860: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
861: PetscViewerASCIIPushTab(viewer);
862: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
863: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
864: if (!sf->graphset) {
865: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
866: PetscViewerASCIIPopTab(viewer);
867: return(0);
868: }
869: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
870: PetscViewerASCIIPushSynchronized(viewer);
871: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
872: for (i=0; i<sf->nleaves; i++) {
873: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
874: }
875: PetscViewerFlush(viewer);
876: PetscViewerGetFormat(viewer,&format);
877: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
878: PetscMPIInt *tmpranks,*perm;
879: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
880: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
881: for (i=0; i<sf->nranks; i++) perm[i] = i;
882: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
883: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
884: for (ii=0; ii<sf->nranks; ii++) {
885: i = perm[ii];
886: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
887: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
888: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
889: }
890: }
891: PetscFree2(tmpranks,perm);
892: }
893: PetscViewerFlush(viewer);
894: PetscViewerASCIIPopSynchronized(viewer);
895: }
896: PetscViewerASCIIPopTab(viewer);
897: }
898: return(0);
899: }
901: /*@C
902: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
904: Not Collective
906: Input Arguments:
907: . sf - star forest
909: Output Arguments:
910: + nranks - number of ranks referenced by local part
911: . ranks - array of ranks
912: . roffset - offset in rmine/rremote for each rank (length nranks+1)
913: . rmine - concatenated array holding local indices referencing each remote rank
914: - rremote - concatenated array holding remote indices referenced for each remote rank
916: Level: developer
918: .seealso: PetscSFGetLeafRanks()
919: @*/
920: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
921: {
926: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927: if (sf->ops->GetRootRanks) {
928: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
929: } else {
930: /* The generic implementation */
931: if (nranks) *nranks = sf->nranks;
932: if (ranks) *ranks = sf->ranks;
933: if (roffset) *roffset = sf->roffset;
934: if (rmine) *rmine = sf->rmine;
935: if (rremote) *rremote = sf->rremote;
936: }
937: return(0);
938: }
940: /*@C
941: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
943: Not Collective
945: Input Arguments:
946: . sf - star forest
948: Output Arguments:
949: + niranks - number of leaf ranks referencing roots on this process
950: . iranks - array of ranks
951: . ioffset - offset in irootloc for each rank (length niranks+1)
952: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
954: Level: developer
956: .seealso: PetscSFGetRootRanks()
957: @*/
958: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
959: {
964: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
965: if (sf->ops->GetLeafRanks) {
966: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
967: } else {
968: PetscSFType type;
969: PetscSFGetType(sf,&type);
970: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
971: }
972: return(0);
973: }
975: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976: PetscInt i;
977: for (i=0; i<n; i++) {
978: if (needle == list[i]) return PETSC_TRUE;
979: }
980: return PETSC_FALSE;
981: }
983: /*@C
984: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
986: Collective
988: Input Arguments:
989: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
990: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
992: Level: developer
994: .seealso: PetscSFGetRootRanks()
995: @*/
996: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
997: {
998: PetscErrorCode ierr;
999: PetscTable table;
1000: PetscTablePosition pos;
1001: PetscMPIInt size,groupsize,*groupranks;
1002: PetscInt *rcount,*ranks;
1003: PetscInt i, irank = -1,orank = -1;
1007: PetscSFCheckGraphSet(sf,1);
1008: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1009: PetscTableCreate(10,size,&table);
1010: for (i=0; i<sf->nleaves; i++) {
1011: /* Log 1-based rank */
1012: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1013: }
1014: PetscTableGetCount(table,&sf->nranks);
1015: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1016: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1017: PetscTableGetHeadPosition(table,&pos);
1018: for (i=0; i<sf->nranks; i++) {
1019: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1020: ranks[i]--; /* Convert back to 0-based */
1021: }
1022: PetscTableDestroy(&table);
1024: /* We expect that dgroup is reliably "small" while nranks could be large */
1025: {
1026: MPI_Group group = MPI_GROUP_NULL;
1027: PetscMPIInt *dgroupranks;
1028: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1029: MPI_Group_size(dgroup,&groupsize);
1030: PetscMalloc1(groupsize,&dgroupranks);
1031: PetscMalloc1(groupsize,&groupranks);
1032: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1034: MPI_Group_free(&group);
1035: PetscFree(dgroupranks);
1036: }
1038: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041: if (InList(ranks[i],groupsize,groupranks)) break;
1042: }
1043: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045: }
1046: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1047: PetscInt tmprank,tmpcount;
1049: tmprank = ranks[i];
1050: tmpcount = rcount[i];
1051: ranks[i] = ranks[sf->ndranks];
1052: rcount[i] = rcount[sf->ndranks];
1053: ranks[sf->ndranks] = tmprank;
1054: rcount[sf->ndranks] = tmpcount;
1055: sf->ndranks++;
1056: }
1057: }
1058: PetscFree(groupranks);
1059: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1060: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1061: sf->roffset[0] = 0;
1062: for (i=0; i<sf->nranks; i++) {
1063: PetscMPIIntCast(ranks[i],sf->ranks+i);
1064: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1065: rcount[i] = 0;
1066: }
1067: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068: /* short circuit */
1069: if (orank != sf->remote[i].rank) {
1070: /* Search for index of iremote[i].rank in sf->ranks */
1071: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1072: if (irank < 0) {
1073: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1074: if (irank >= 0) irank += sf->ndranks;
1075: }
1076: orank = sf->remote[i].rank;
1077: }
1078: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1079: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1080: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1081: rcount[irank]++;
1082: }
1083: PetscFree2(rcount,ranks);
1084: return(0);
1085: }
1087: /*@C
1088: PetscSFGetGroups - gets incoming and outgoing process groups
1090: Collective
1092: Input Argument:
1093: . sf - star forest
1095: Output Arguments:
1096: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1097: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1099: Level: developer
1101: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1102: @*/
1103: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1104: {
1106: MPI_Group group = MPI_GROUP_NULL;
1109: if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1110: if (sf->ingroup == MPI_GROUP_NULL) {
1111: PetscInt i;
1112: const PetscInt *indegree;
1113: PetscMPIInt rank,*outranks,*inranks;
1114: PetscSFNode *remote;
1115: PetscSF bgcount;
1117: /* Compute the number of incoming ranks */
1118: PetscMalloc1(sf->nranks,&remote);
1119: for (i=0; i<sf->nranks; i++) {
1120: remote[i].rank = sf->ranks[i];
1121: remote[i].index = 0;
1122: }
1123: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1124: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1125: PetscSFComputeDegreeBegin(bgcount,&indegree);
1126: PetscSFComputeDegreeEnd(bgcount,&indegree);
1127: /* Enumerate the incoming ranks */
1128: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1129: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1130: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1131: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1132: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1133: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1134: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1135: MPI_Group_free(&group);
1136: PetscFree2(inranks,outranks);
1137: PetscSFDestroy(&bgcount);
1138: }
1139: *incoming = sf->ingroup;
1141: if (sf->outgroup == MPI_GROUP_NULL) {
1142: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1143: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1144: MPI_Group_free(&group);
1145: }
1146: *outgoing = sf->outgroup;
1147: return(0);
1148: }
1150: /*@
1151: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1153: Collective
1155: Input Argument:
1156: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1158: Output Arguments:
1159: . multi - star forest with split roots, such that each root has degree exactly 1
1161: Level: developer
1163: Notes:
1165: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1166: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1167: edge, it is a candidate for future optimization that might involve its removal.
1169: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1170: @*/
1171: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1172: {
1178: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1179: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1180: *multi = sf->multi;
1181: return(0);
1182: }
1183: if (!sf->multi) {
1184: const PetscInt *indegree;
1185: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1186: PetscSFNode *remote;
1187: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1188: PetscSFComputeDegreeBegin(sf,&indegree);
1189: PetscSFComputeDegreeEnd(sf,&indegree);
1190: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1191: inoffset[0] = 0;
1192: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1193: for (i=0; i<maxlocal; i++) outones[i] = 1;
1194: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1195: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1196: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1197: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1198: for (i=0; i<sf->nroots; i++) {
1199: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1200: }
1201: }
1202: PetscMalloc1(sf->nleaves,&remote);
1203: for (i=0; i<sf->nleaves; i++) {
1204: remote[i].rank = sf->remote[i].rank;
1205: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206: }
1207: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1208: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1209: if (sf->rankorder) { /* Sort the ranks */
1210: PetscMPIInt rank;
1211: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1212: PetscSFNode *newremote;
1213: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1214: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1215: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1216: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1217: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1218: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1219: /* Sort the incoming ranks at each vertex, build the inverse map */
1220: for (i=0; i<sf->nroots; i++) {
1221: PetscInt j;
1222: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1223: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1224: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1225: }
1226: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1227: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1228: PetscMalloc1(sf->nleaves,&newremote);
1229: for (i=0; i<sf->nleaves; i++) {
1230: newremote[i].rank = sf->remote[i].rank;
1231: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1232: }
1233: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1234: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1235: }
1236: PetscFree3(inoffset,outones,outoffset);
1237: }
1238: *multi = sf->multi;
1239: return(0);
1240: }
1242: /*@C
1243: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1245: Collective
1247: Input Arguments:
1248: + sf - original star forest
1249: . nselected - number of selected roots on this process
1250: - selected - indices of the selected roots on this process
1252: Output Arguments:
1253: . esf - new star forest
1255: Level: advanced
1257: Note:
1258: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1259: be done by calling PetscSFGetGraph().
1261: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1262: @*/
1263: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1264: {
1265: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266: const PetscInt *ilocal;
1267: signed char *rootdata,*leafdata,*leafmem;
1268: const PetscSFNode *iremote;
1269: PetscSFNode *new_iremote;
1270: MPI_Comm comm;
1271: PetscErrorCode ierr;
1275: PetscSFCheckGraphSet(sf,1);
1279: PetscSFSetUp(sf);
1280: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1281: PetscObjectGetComm((PetscObject)sf,&comm);
1282: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1284: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1285: PetscBool dups;
1287: if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288: for (i=0; i<nselected; i++)
1289: if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290: }
1292: if (sf->ops->CreateEmbeddedSF) {
1293: (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);
1294: } else {
1295: /* A generic version of creating embedded sf */
1296: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1297: maxlocal = maxleaf - minleaf + 1;
1298: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1299: leafdata = leafmem - minleaf;
1300: /* Tag selected roots and bcast to leaves */
1301: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1303: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1305: /* Build esf with leaves that are still connected */
1306: esf_nleaves = 0;
1307: for (i=0; i<nleaves; i++) {
1308: j = ilocal ? ilocal[i] : i;
1309: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311: */
1312: esf_nleaves += (leafdata[j] ? 1 : 0);
1313: }
1314: PetscMalloc1(esf_nleaves,&new_ilocal);
1315: PetscMalloc1(esf_nleaves,&new_iremote);
1316: for (i=n=0; i<nleaves; i++) {
1317: j = ilocal ? ilocal[i] : i;
1318: if (leafdata[j]) {
1319: new_ilocal[n] = j;
1320: new_iremote[n].rank = iremote[i].rank;
1321: new_iremote[n].index = iremote[i].index;
1322: ++n;
1323: }
1324: }
1325: PetscSFCreate(comm,esf);
1326: PetscSFSetFromOptions(*esf);
1327: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1328: PetscFree2(rootdata,leafmem);
1329: }
1330: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1331: return(0);
1332: }
1334: /*@C
1335: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1337: Collective
1339: Input Arguments:
1340: + sf - original star forest
1341: . nselected - number of selected leaves on this process
1342: - selected - indices of the selected leaves on this process
1344: Output Arguments:
1345: . newsf - new star forest
1347: Level: advanced
1349: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1350: @*/
1351: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1352: {
1353: const PetscSFNode *iremote;
1354: PetscSFNode *new_iremote;
1355: const PetscInt *ilocal;
1356: PetscInt i,nroots,*leaves,*new_ilocal;
1357: MPI_Comm comm;
1358: PetscErrorCode ierr;
1362: PetscSFCheckGraphSet(sf,1);
1366: /* Uniq selected[] and put results in leaves[] */
1367: PetscObjectGetComm((PetscObject)sf,&comm);
1368: PetscMalloc1(nselected,&leaves);
1369: PetscArraycpy(leaves,selected,nselected);
1370: PetscSortedRemoveDupsInt(&nselected,leaves);
1371: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1373: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1376: } else {
1377: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1378: PetscMalloc1(nselected,&new_ilocal);
1379: PetscMalloc1(nselected,&new_iremote);
1380: for (i=0; i<nselected; ++i) {
1381: const PetscInt l = leaves[i];
1382: new_ilocal[i] = ilocal ? ilocal[l] : l;
1383: new_iremote[i].rank = iremote[l].rank;
1384: new_iremote[i].index = iremote[l].index;
1385: }
1386: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1387: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1388: }
1389: PetscFree(leaves);
1390: return(0);
1391: }
1393: /*@C
1394: PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1396: Collective on PetscSF
1398: Input Arguments:
1399: + sf - star forest on which to communicate
1400: . unit - data type associated with each node
1401: . rootdata - buffer to broadcast
1402: - op - operation to use for reduction
1404: Output Arguments:
1405: . leafdata - buffer to be reduced with values from each leaf's respective root
1407: Level: intermediate
1409: Notes:
1410: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1411: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1412: use PetscSFBcastAndOpWithMemTypeBegin() instead.
1413: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin()
1414: @*/
1415: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1416: {
1418: PetscMemType rootmtype,leafmtype;
1422: PetscSFSetUp(sf);
1423: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1424: PetscGetMemType(rootdata,&rootmtype);
1425: PetscGetMemType(leafdata,&leafmtype);
1426: (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1427: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1428: return(0);
1429: }
1431: /*@C
1432: PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd()
1434: Collective on PetscSF
1436: Input Arguments:
1437: + sf - star forest on which to communicate
1438: . unit - data type associated with each node
1439: . rootmtype - memory type of rootdata
1440: . rootdata - buffer to broadcast
1441: . leafmtype - memory type of leafdata
1442: - op - operation to use for reduction
1444: Output Arguments:
1445: . leafdata - buffer to be reduced with values from each leaf's respective root
1447: Level: intermediate
1449: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin()
1450: @*/
1451: PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1452: {
1457: PetscSFSetUp(sf);
1458: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1459: (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1460: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1461: return(0);
1462: }
1464: /*@C
1465: PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1467: Collective
1469: Input Arguments:
1470: + sf - star forest
1471: . unit - data type
1472: . rootdata - buffer to broadcast
1473: - op - operation to use for reduction
1475: Output Arguments:
1476: . leafdata - buffer to be reduced with values from each leaf's respective root
1478: Level: intermediate
1480: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1481: @*/
1482: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1483: {
1488: PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1489: (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1490: PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1491: return(0);
1492: }
1494: /*@C
1495: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1497: Collective
1499: Input Arguments:
1500: + sf - star forest
1501: . unit - data type
1502: . leafdata - values to reduce
1503: - op - reduction operation
1505: Output Arguments:
1506: . rootdata - result of reduction of values from all leaves of each root
1508: Level: intermediate
1510: Notes:
1511: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1512: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1513: use PetscSFReduceWithMemTypeBegin() instead.
1515: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1516: @*/
1517: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1518: {
1520: PetscMemType rootmtype,leafmtype;
1524: PetscSFSetUp(sf);
1525: PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1526: PetscGetMemType(rootdata,&rootmtype);
1527: PetscGetMemType(leafdata,&leafmtype);
1528: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1529: PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1530: return(0);
1531: }
1533: /*@C
1534: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1536: Collective
1538: Input Arguments:
1539: + sf - star forest
1540: . unit - data type
1541: . leafmtype - memory type of leafdata
1542: . leafdata - values to reduce
1543: . rootmtype - memory type of rootdata
1544: - op - reduction operation
1546: Output Arguments:
1547: . rootdata - result of reduction of values from all leaves of each root
1549: Level: intermediate
1551: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1552: @*/
1553: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1554: {
1559: PetscSFSetUp(sf);
1560: PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1561: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1562: PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1563: return(0);
1564: }
1566: /*@C
1567: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1569: Collective
1571: Input Arguments:
1572: + sf - star forest
1573: . unit - data type
1574: . leafdata - values to reduce
1575: - op - reduction operation
1577: Output Arguments:
1578: . rootdata - result of reduction of values from all leaves of each root
1580: Level: intermediate
1582: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1583: @*/
1584: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1585: {
1590: PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1591: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1592: PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1593: return(0);
1594: }
1596: /*@C
1597: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1599: Collective
1601: Input Arguments:
1602: + sf - star forest
1603: . unit - data type
1604: . leafdata - leaf values to use in reduction
1605: - op - operation to use for reduction
1607: Output Arguments:
1608: + rootdata - root values to be updated, input state is seen by first process to perform an update
1609: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1611: Level: advanced
1613: Note:
1614: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1615: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1616: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1617: integers.
1619: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1620: @*/
1621: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1622: {
1624: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1628: PetscSFSetUp(sf);
1629: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1630: PetscGetMemType(rootdata,&rootmtype);
1631: PetscGetMemType(leafdata,&leafmtype);
1632: PetscGetMemType(leafupdate,&leafupdatemtype);
1633: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1634: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1635: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1636: return(0);
1637: }
1639: /*@C
1640: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1642: Collective
1644: Input Arguments:
1645: + sf - star forest
1646: . unit - data type
1647: . leafdata - leaf values to use in reduction
1648: - op - operation to use for reduction
1650: Output Arguments:
1651: + rootdata - root values to be updated, input state is seen by first process to perform an update
1652: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1654: Level: advanced
1656: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1657: @*/
1658: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1659: {
1664: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1665: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1666: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1667: return(0);
1668: }
1670: /*@C
1671: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1673: Collective
1675: Input Arguments:
1676: . sf - star forest
1678: Output Arguments:
1679: . degree - degree of each root vertex
1681: Level: advanced
1683: Notes:
1684: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1686: .seealso: PetscSFGatherBegin()
1687: @*/
1688: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1689: {
1694: PetscSFCheckGraphSet(sf,1);
1696: if (!sf->degreeknown) {
1697: PetscInt i, nroots = sf->nroots, maxlocal;
1698: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1699: maxlocal = sf->maxleaf-sf->minleaf+1;
1700: PetscMalloc1(nroots,&sf->degree);
1701: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1702: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1703: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1704: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1705: }
1706: *degree = NULL;
1707: return(0);
1708: }
1710: /*@C
1711: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1713: Collective
1715: Input Arguments:
1716: . sf - star forest
1718: Output Arguments:
1719: . degree - degree of each root vertex
1721: Level: developer
1723: Notes:
1724: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1726: .seealso:
1727: @*/
1728: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1729: {
1734: PetscSFCheckGraphSet(sf,1);
1736: if (!sf->degreeknown) {
1737: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1738: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1739: PetscFree(sf->degreetmp);
1740: sf->degreeknown = PETSC_TRUE;
1741: }
1742: *degree = sf->degree;
1743: return(0);
1744: }
1747: /*@C
1748: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1749: Each multi-root is assigned index of the corresponding original root.
1751: Collective
1753: Input Arguments:
1754: + sf - star forest
1755: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1757: Output Arguments:
1758: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1759: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1761: Level: developer
1763: Notes:
1764: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1766: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1767: @*/
1768: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1769: {
1770: PetscSF msf;
1771: PetscInt i, j, k, nroots, nmroots;
1772: PetscErrorCode ierr;
1776: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1780: PetscSFGetMultiSF(sf,&msf);
1781: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1782: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1783: for (i=0,j=0,k=0; i<nroots; i++) {
1784: if (!degree[i]) continue;
1785: for (j=0; j<degree[i]; j++,k++) {
1786: (*multiRootsOrigNumbering)[k] = i;
1787: }
1788: }
1789: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1790: if (nMultiRoots) *nMultiRoots = nmroots;
1791: return(0);
1792: }
1794: /*@C
1795: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1797: Collective
1799: Input Arguments:
1800: + sf - star forest
1801: . unit - data type
1802: - leafdata - leaf data to gather to roots
1804: Output Argument:
1805: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1807: Level: intermediate
1809: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1810: @*/
1811: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1812: {
1814: PetscSF multi = NULL;
1818: PetscSFSetUp(sf);
1819: PetscSFGetMultiSF(sf,&multi);
1820: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1821: return(0);
1822: }
1824: /*@C
1825: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1827: Collective
1829: Input Arguments:
1830: + sf - star forest
1831: . unit - data type
1832: - leafdata - leaf data to gather to roots
1834: Output Argument:
1835: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1837: Level: intermediate
1839: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1840: @*/
1841: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1842: {
1844: PetscSF multi = NULL;
1848: PetscSFGetMultiSF(sf,&multi);
1849: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1850: return(0);
1851: }
1853: /*@C
1854: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1856: Collective
1858: Input Arguments:
1859: + sf - star forest
1860: . unit - data type
1861: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1863: Output Argument:
1864: . leafdata - leaf data to be update with personal data from each respective root
1866: Level: intermediate
1868: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1869: @*/
1870: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1871: {
1873: PetscSF multi = NULL;
1877: PetscSFSetUp(sf);
1878: PetscSFGetMultiSF(sf,&multi);
1879: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1880: return(0);
1881: }
1883: /*@C
1884: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1886: Collective
1888: Input Arguments:
1889: + sf - star forest
1890: . unit - data type
1891: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1893: Output Argument:
1894: . leafdata - leaf data to be update with personal data from each respective root
1896: Level: intermediate
1898: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1899: @*/
1900: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1901: {
1903: PetscSF multi = NULL;
1907: PetscSFGetMultiSF(sf,&multi);
1908: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1909: return(0);
1910: }
1912: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1913: {
1914: PetscInt i, n, nleaves;
1915: const PetscInt *ilocal = NULL;
1916: PetscHSetI seen;
1917: PetscErrorCode ierr;
1920: if (PetscDefined(USE_DEBUG)) {
1921: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1922: PetscHSetICreate(&seen);
1923: for (i = 0; i < nleaves; i++) {
1924: const PetscInt leaf = ilocal ? ilocal[i] : i;
1925: PetscHSetIAdd(seen,leaf);
1926: }
1927: PetscHSetIGetSize(seen,&n);
1928: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929: PetscHSetIDestroy(&seen);
1930: }
1931: return(0);
1932: }
1934: /*@
1935: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1937: Input Parameters:
1938: + sfA - The first PetscSF
1939: - sfB - The second PetscSF
1941: Output Parameters:
1942: . sfBA - The composite SF
1944: Level: developer
1946: Notes:
1947: Currently, the two SFs must be defined on congruent communicators and they must be true star
1948: forests, i.e. the same leaf is not connected with different roots.
1950: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1951: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1952: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1953: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1955: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1956: @*/
1957: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1958: {
1959: PetscErrorCode ierr;
1960: const PetscSFNode *remotePointsA,*remotePointsB;
1961: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1962: const PetscInt *localPointsA,*localPointsB;
1963: PetscInt *localPointsBA;
1964: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1965: PetscBool denseB;
1969: PetscSFCheckGraphSet(sfA,1);
1971: PetscSFCheckGraphSet(sfB,2);
1974: PetscSFCheckLeavesUnique_Private(sfA);
1975: PetscSFCheckLeavesUnique_Private(sfB);
1977: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1978: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1979: if (localPointsA) {
1980: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1981: for (i=0; i<numRootsB; i++) {
1982: reorderedRemotePointsA[i].rank = -1;
1983: reorderedRemotePointsA[i].index = -1;
1984: }
1985: for (i=0; i<numLeavesA; i++) {
1986: if (localPointsA[i] >= numRootsB) continue;
1987: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1988: }
1989: remotePointsA = reorderedRemotePointsA;
1990: }
1991: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1992: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1993: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1994: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1995: PetscFree(reorderedRemotePointsA);
1997: denseB = (PetscBool)!localPointsB;
1998: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1999: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2000: else numLeavesBA++;
2001: }
2002: if (denseB) {
2003: localPointsBA = NULL;
2004: remotePointsBA = leafdataB;
2005: } else {
2006: PetscMalloc1(numLeavesBA,&localPointsBA);
2007: PetscMalloc1(numLeavesBA,&remotePointsBA);
2008: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2009: const PetscInt l = localPointsB ? localPointsB[i] : i;
2011: if (leafdataB[l-minleaf].rank == -1) continue;
2012: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2013: localPointsBA[numLeavesBA] = l;
2014: numLeavesBA++;
2015: }
2016: PetscFree(leafdataB);
2017: }
2018: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2019: PetscSFSetFromOptions(*sfBA);
2020: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2021: return(0);
2022: }
2024: /*@
2025: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2027: Input Parameters:
2028: + sfA - The first PetscSF
2029: - sfB - The second PetscSF
2031: Output Parameters:
2032: . sfBA - The composite SF.
2034: Level: developer
2036: Notes:
2037: Currently, the two SFs must be defined on congruent communicators and they must be true star
2038: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2039: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2041: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2042: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2043: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2044: a Reduce on sfB, on connected roots.
2046: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2047: @*/
2048: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2049: {
2050: PetscErrorCode ierr;
2051: const PetscSFNode *remotePointsA,*remotePointsB;
2052: PetscSFNode *remotePointsBA;
2053: const PetscInt *localPointsA,*localPointsB;
2054: PetscSFNode *reorderedRemotePointsA = NULL;
2055: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2056: MPI_Op op;
2057: #if defined(PETSC_USE_64BIT_INDICES)
2058: PetscBool iswin;
2059: #endif
2063: PetscSFCheckGraphSet(sfA,1);
2065: PetscSFCheckGraphSet(sfB,2);
2068: PetscSFCheckLeavesUnique_Private(sfA);
2069: PetscSFCheckLeavesUnique_Private(sfB);
2071: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2072: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2074: /* TODO: Check roots of sfB have degree of 1 */
2075: /* Once we implement it, we can replace the MPI_MAXLOC
2076: with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2077: We use MPI_MAXLOC only to have a deterministic output from this routine if
2078: the root condition is not meet.
2079: */
2080: op = MPI_MAXLOC;
2081: #if defined(PETSC_USE_64BIT_INDICES)
2082: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2083: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2084: if (iswin) op = MPIU_REPLACE;
2085: #endif
2087: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2088: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2089: for (i=0; i<maxleaf - minleaf + 1; i++) {
2090: reorderedRemotePointsA[i].rank = -1;
2091: reorderedRemotePointsA[i].index = -1;
2092: }
2093: if (localPointsA) {
2094: for (i=0; i<numLeavesA; i++) {
2095: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2096: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2097: }
2098: } else {
2099: for (i=0; i<numLeavesA; i++) {
2100: if (i > maxleaf || i < minleaf) continue;
2101: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2102: }
2103: }
2105: PetscMalloc1(numRootsB,&localPointsBA);
2106: PetscMalloc1(numRootsB,&remotePointsBA);
2107: for (i=0; i<numRootsB; i++) {
2108: remotePointsBA[i].rank = -1;
2109: remotePointsBA[i].index = -1;
2110: }
2112: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2113: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2114: PetscFree(reorderedRemotePointsA);
2115: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2116: if (remotePointsBA[i].rank == -1) continue;
2117: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2118: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2119: localPointsBA[numLeavesBA] = i;
2120: numLeavesBA++;
2121: }
2122: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2123: PetscSFSetFromOptions(*sfBA);
2124: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2125: return(0);
2126: }
2128: /*
2129: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2131: Input Parameters:
2132: . sf - The global PetscSF
2134: Output Parameters:
2135: . out - The local PetscSF
2136: */
2137: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2138: {
2139: MPI_Comm comm;
2140: PetscMPIInt myrank;
2141: const PetscInt *ilocal;
2142: const PetscSFNode *iremote;
2143: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2144: PetscSFNode *liremote;
2145: PetscSF lsf;
2146: PetscErrorCode ierr;
2150: if (sf->ops->CreateLocalSF) {
2151: (*sf->ops->CreateLocalSF)(sf,out);
2152: } else {
2153: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2154: PetscObjectGetComm((PetscObject)sf,&comm);
2155: MPI_Comm_rank(comm,&myrank);
2157: /* Find out local edges and build a local SF */
2158: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2159: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2160: PetscMalloc1(lnleaves,&lilocal);
2161: PetscMalloc1(lnleaves,&liremote);
2163: for (i=j=0; i<nleaves; i++) {
2164: if (iremote[i].rank == (PetscInt)myrank) {
2165: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2166: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2167: liremote[j].index = iremote[i].index;
2168: j++;
2169: }
2170: }
2171: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2172: PetscSFSetFromOptions(lsf);
2173: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2174: PetscSFSetUp(lsf);
2175: *out = lsf;
2176: }
2177: return(0);
2178: }
2180: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2181: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2182: {
2183: PetscErrorCode ierr;
2184: PetscMemType rootmtype,leafmtype;
2188: PetscSFSetUp(sf);
2189: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2190: PetscGetMemType(rootdata,&rootmtype);
2191: PetscGetMemType(leafdata,&leafmtype);
2192: if (sf->ops->BcastToZero) {
2193: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2194: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2195: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2196: return(0);
2197: }