Actual source code: sfimpl.h
petsc-3.12.5 2020-03-29
1: #if !defined(PETSCSFIMPL_H)
2: #define PETSCSFIMPL_H
4: #include <petscsf.h>
5: #include <petsc/private/petscimpl.h>
6: #include <petscviewer.h>
8: #if defined(PETSC_HAVE_CUDA)
9: #include <cuda_runtime.h>
10: #endif
12: PETSC_EXTERN PetscLogEvent PETSCSF_SetGraph;
13: PETSC_EXTERN PetscLogEvent PETSCSF_SetUp;
14: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
15: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
16: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpBegin;
17: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpEnd;
18: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceBegin;
19: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceEnd;
20: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpBegin;
21: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpEnd;
22: PETSC_EXTERN PetscLogEvent PETSCSF_EmbedSF;
23: PETSC_EXTERN PetscLogEvent PETSCSF_DistSect;
24: PETSC_EXTERN PetscLogEvent PETSCSF_SectSF;
25: PETSC_EXTERN PetscLogEvent PETSCSF_RemoteOff;
27: typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE} PetscMemType;
29: struct _PetscSFOps {
30: PetscErrorCode (*Reset)(PetscSF);
31: PetscErrorCode (*Destroy)(PetscSF);
32: PetscErrorCode (*SetUp)(PetscSF);
33: PetscErrorCode (*SetFromOptions)(PetscOptionItems*,PetscSF);
34: PetscErrorCode (*View)(PetscSF,PetscViewer);
35: PetscErrorCode (*Duplicate)(PetscSF,PetscSFDuplicateOption,PetscSF);
36: PetscErrorCode (*BcastAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op);
37: PetscErrorCode (*BcastAndOpEnd) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op);
38: PetscErrorCode (*ReduceBegin) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op);
39: PetscErrorCode (*ReduceEnd) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op);
40: PetscErrorCode (*FetchAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType, void*,PetscMemType,const void*,void*,MPI_Op);
41: PetscErrorCode (*FetchAndOpEnd) (PetscSF,MPI_Datatype,PetscMemType, void*,PetscMemType,const void*,void*,MPI_Op);
42: PetscErrorCode (*BcastToZero) (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*); /* For interal use only */
43: PetscErrorCode (*GetRootRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**,const PetscInt**);
44: PetscErrorCode (*GetLeafRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
45: PetscErrorCode (*CreateLocalSF)(PetscSF,PetscSF*);
46: PetscErrorCode (*GetGraph)(PetscSF,PetscInt*,PetscInt*,const PetscInt**,const PetscSFNode**);
47: PetscErrorCode (*CreateEmbeddedSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
48: PetscErrorCode (*CreateEmbeddedLeafSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
49: };
51: typedef struct _n_PetscSFPackOpt *PetscSFPackOpt;
53: struct _p_PetscSF {
54: PETSCHEADER(struct _PetscSFOps);
55: PetscInt nroots; /* Number of root vertices on current process (candidates for incoming edges) */
56: PetscInt nleaves; /* Number of leaf vertices on current process (this process specifies a root for each leaf) */
57: PetscInt *mine; /* Location of leaves in leafdata arrays provided to the communication routines */
58: PetscInt *mine_alloc;
59: PetscInt minleaf,maxleaf;
60: PetscSFNode *remote; /* Remote references to roots for each local leaf */
61: PetscSFNode *remote_alloc;
62: PetscInt nranks; /* Number of ranks owning roots connected to my leaves */
63: PetscInt ndranks; /* Number of ranks in distinguished group holding roots connected to my leaves */
64: PetscMPIInt *ranks; /* List of ranks referenced by "remote" */
65: PetscInt *roffset; /* Array of length nranks+1, offset in rmine/rremote for each rank */
66: PetscInt *rmine; /* Concatenated array holding local indices referencing each remote rank */
67: PetscInt *rremote; /* Concatenated array holding remote indices referenced for each remote rank */
68: PetscBool degreeknown; /* The degree is currently known, do not have to recompute */
69: PetscInt *degree; /* Degree of each of my root vertices */
70: PetscInt *degreetmp; /* Temporary local array for computing degree */
71: PetscBool rankorder; /* Sort ranks for gather and scatter operations */
72: MPI_Group ingroup; /* Group of processes connected to my roots */
73: MPI_Group outgroup; /* Group of processes connected to my leaves */
74: PetscSF multi; /* Internal graph used to implement gather and scatter operations */
75: PetscBool graphset; /* Flag indicating that the graph has been set, required before calling communication routines */
76: PetscBool setupcalled; /* Type and communication structures have been set up */
77: PetscSFPackOpt leafpackopt; /* Optimization plans to (un)pack leaves connected to remote roots, based on index patterns in rmine[]. NULL for no optimization */
78: PetscSFPackOpt selfleafpackopt; /* Optimization plans to (un)pack leaves connected to local roots */
79: PetscBool selfleafdups; /* Indices of leaves in rmine[0,roffset[ndranks]) have dups, implying theads working ... */
80: /* ... on these leaves in parallel may have data race. */
81: PetscBool remoteleafdups; /* Indices of leaves in rmine[roffset[ndranks],roffset[nranks]) have dups */
83: PetscSFPattern pattern; /* Pattern of the graph */
84: PetscLayout map; /* Layout of leaves over all processes when building a patterned graph */
85: #if defined(PETSC_HAVE_CUDA)
86: PetscInt *rmine_d; /* A copy of rmine in device memory */
87: PetscInt MAX_CORESIDENT_THREADS;
88: #endif
89: void *data; /* Pointer to implementation */
90: };
92: PETSC_EXTERN PetscBool PetscSFRegisterAllCalled;
93: PETSC_EXTERN PetscErrorCode PetscSFRegisterAll(void);
95: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF,PetscSF*);
96: PETSC_INTERN PetscErrorCode PetscSFBcastToZero_Private(PetscSF,MPI_Datatype,const void*,void*);
98: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_unwrap(MPI_Datatype,MPI_Datatype*,PetscBool*);
99: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype,MPI_Datatype,PetscBool*);
100: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare_contig(MPI_Datatype,MPI_Datatype,PetscInt*);
102: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
103: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req) MPI_Iscatter(a,b,c,d,e,f,g,h,req)
104: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req) MPI_Iscatterv(a,b,c,d,e,f,g,h,i,req)
105: #define MPIU_Igather(a,b,c,d,e,f,g,h,req) MPI_Igather(a,b,c,d,e,f,g,h,req)
106: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req) MPI_Igatherv(a,b,c,d,e,f,g,h,i,req)
107: #define MPIU_Iallgather(a,b,c,d,e,f,g,req) MPI_Iallgather(a,b,c,d,e,f,g,req)
108: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req) MPI_Iallgatherv(a,b,c,d,e,f,g,h,req)
109: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req) MPI_Ialltoall(a,b,c,d,e,f,g,req)
110: #else
111: /* Ignore req, the MPI_Request argument, and use MPI blocking collectives. One should initialize req
112: to MPI_REQUEST_NULL so that one can do MPI_Wait(req,status) no matter the call is blocking or not.
113: */
114: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req) MPI_Scatter(a,b,c,d,e,f,g,h)
115: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req) MPI_Scatterv(a,b,c,d,e,f,g,h,i)
116: #define MPIU_Igather(a,b,c,d,e,f,g,h,req) MPI_Gather(a,b,c,d,e,f,g,h)
117: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req) MPI_Gatherv(a,b,c,d,e,f,g,h,i)
118: #define MPIU_Iallgather(a,b,c,d,e,f,g,req) MPI_Allgather(a,b,c,d,e,f,g)
119: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req) MPI_Allgatherv(a,b,c,d,e,f,g,h)
120: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req) MPI_Alltoall(a,b,c,d,e,f,g)
121: #endif
123: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *mtype)
124: {
127: *mtype = PETSC_MEMTYPE_HOST;
128: #if defined(PETSC_HAVE_CUDA)
129: if (use_gpu_aware_mpi) {
130: struct cudaPointerAttributes attr;
131: if (data) {
132: #if (CUDART_VERSION < 10000)
133: attr.memoryType = cudaMemoryTypeHost;
134: cudaPointerGetAttributes(&attr,data);
135: cudaGetLastError();
136: if (attr.memoryType == cudaMemoryTypeDevice) *mtype = PETSC_MEMTYPE_DEVICE;
137: #else
138: attr.type = cudaMemoryTypeHost;
139: cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing host pointer will return cudaErrorInvalidValue */
140: cudaGetLastError(); /* Get and then clear the last error */
141: if (attr.type == cudaMemoryTypeDevice || attr.type == cudaMemoryTypeManaged) *mtype = PETSC_MEMTYPE_DEVICE;
142: #endif
143: }
144: }
145: #endif
146: return(0);
147: }
149: PETSC_STATIC_INLINE PetscErrorCode PetscMallocWithMemType(PetscMemType mtype,size_t size,void** ptr)
150: {
152: if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMalloc(size,ptr);}
153: #if defined(PETSC_HAVE_CUDA)
154: else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMalloc(ptr,size);CHKERRCUDA(err);}
155: #endif
156: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d", (int)mtype);
157: return(0);
158: }
160: PETSC_STATIC_INLINE PetscErrorCode PetscFreeWithMemType_Private(PetscMemType mtype,void* ptr)
161: {
163: if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscFree(ptr);}
164: #if defined(PETSC_HAVE_CUDA)
165: else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaFree(ptr);CHKERRCUDA(err);}
166: #endif
167: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d",(int)mtype);
168: return(0);
169: }
171: /* Free memory and set ptr to NULL when succeeded */
172: #define PetscFreeWithMemType(t,p) ((p) && (PetscFreeWithMemType_Private((t),(p)) || ((p)=0,0)))
174: PETSC_STATIC_INLINE PetscErrorCode PetscMemcpyWithMemType(PetscMemType dstmtype,PetscMemType srcmtype,void* dst,const void*src,size_t n)
175: {
177: if (n) {
178: if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMemcpy(dst,src,n);}
179: #if defined(PETSC_HAVE_CUDA)
180: else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_HOST) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyHostToDevice);CHKERRCUDA(err);}
181: else if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToHost);CHKERRCUDA(err);}
182: else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToDevice);CHKERRCUDA(err);}
183: #endif
184: else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType for dst %d and src %d",(int)dstmtype,(int)srcmtype);
185: }
186: return(0);
187: }
189: #endif