Actual source code: sfimpl.h

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1: #if !defined(PETSCSFIMPL_H)
  2: #define PETSCSFIMPL_H

  4:  #include <petscsf.h>
  5:  #include <petsc/private/petscimpl.h>
  6:  #include <petscviewer.h>

  8: #if defined(PETSC_HAVE_CUDA)
  9: #include <cuda_runtime.h>
 10: #endif

 12: PETSC_EXTERN PetscLogEvent PETSCSF_SetGraph;
 13: PETSC_EXTERN PetscLogEvent PETSCSF_SetUp;
 14: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
 15: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
 16: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpBegin;
 17: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpEnd;
 18: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceBegin;
 19: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceEnd;
 20: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpBegin;
 21: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpEnd;
 22: PETSC_EXTERN PetscLogEvent PETSCSF_EmbedSF;
 23: PETSC_EXTERN PetscLogEvent PETSCSF_DistSect;
 24: PETSC_EXTERN PetscLogEvent PETSCSF_SectSF;
 25: PETSC_EXTERN PetscLogEvent PETSCSF_RemoteOff;

 27: typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE} PetscMemType;

 29: struct _PetscSFOps {
 30:   PetscErrorCode (*Reset)(PetscSF);
 31:   PetscErrorCode (*Destroy)(PetscSF);
 32:   PetscErrorCode (*SetUp)(PetscSF);
 33:   PetscErrorCode (*SetFromOptions)(PetscOptionItems*,PetscSF);
 34:   PetscErrorCode (*View)(PetscSF,PetscViewer);
 35:   PetscErrorCode (*Duplicate)(PetscSF,PetscSFDuplicateOption,PetscSF);
 36:   PetscErrorCode (*BcastAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 37:   PetscErrorCode (*BcastAndOpEnd)  (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 38:   PetscErrorCode (*ReduceBegin)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 39:   PetscErrorCode (*ReduceEnd)      (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 40:   PetscErrorCode (*FetchAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,      void*,PetscMemType,const void*,void*,MPI_Op);
 41:   PetscErrorCode (*FetchAndOpEnd)  (PetscSF,MPI_Datatype,PetscMemType,      void*,PetscMemType,const void*,void*,MPI_Op);
 42:   PetscErrorCode (*BcastToZero)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*); /* For interal use only */
 43:   PetscErrorCode (*GetRootRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**,const PetscInt**);
 44:   PetscErrorCode (*GetLeafRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
 45:   PetscErrorCode (*CreateLocalSF)(PetscSF,PetscSF*);
 46:   PetscErrorCode (*GetGraph)(PetscSF,PetscInt*,PetscInt*,const PetscInt**,const PetscSFNode**);
 47:   PetscErrorCode (*CreateEmbeddedSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 48:   PetscErrorCode (*CreateEmbeddedLeafSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 49: };

 51: typedef struct _n_PetscSFPackOpt *PetscSFPackOpt;

 53: struct _p_PetscSF {
 54:   PETSCHEADER(struct _PetscSFOps);
 55:   PetscInt        nroots;          /* Number of root vertices on current process (candidates for incoming edges) */
 56:   PetscInt        nleaves;         /* Number of leaf vertices on current process (this process specifies a root for each leaf) */
 57:   PetscInt        *mine;           /* Location of leaves in leafdata arrays provided to the communication routines */
 58:   PetscInt        *mine_alloc;
 59:   PetscInt        minleaf,maxleaf;
 60:   PetscSFNode     *remote;         /* Remote references to roots for each local leaf */
 61:   PetscSFNode     *remote_alloc;
 62:   PetscInt        nranks;          /* Number of ranks owning roots connected to my leaves */
 63:   PetscInt        ndranks;         /* Number of ranks in distinguished group holding roots connected to my leaves */
 64:   PetscMPIInt     *ranks;          /* List of ranks referenced by "remote" */
 65:   PetscInt        *roffset;        /* Array of length nranks+1, offset in rmine/rremote for each rank */
 66:   PetscInt        *rmine;          /* Concatenated array holding local indices referencing each remote rank */
 67:   PetscInt        *rremote;        /* Concatenated array holding remote indices referenced for each remote rank */
 68:   PetscBool       degreeknown;     /* The degree is currently known, do not have to recompute */
 69:   PetscInt        *degree;         /* Degree of each of my root vertices */
 70:   PetscInt        *degreetmp;      /* Temporary local array for computing degree */
 71:   PetscBool       rankorder;       /* Sort ranks for gather and scatter operations */
 72:   MPI_Group       ingroup;         /* Group of processes connected to my roots */
 73:   MPI_Group       outgroup;        /* Group of processes connected to my leaves */
 74:   PetscSF         multi;           /* Internal graph used to implement gather and scatter operations */
 75:   PetscBool       graphset;        /* Flag indicating that the graph has been set, required before calling communication routines */
 76:   PetscBool       setupcalled;     /* Type and communication structures have been set up */
 77:   PetscSFPackOpt  leafpackopt;     /* Optimization plans to (un)pack leaves connected to remote roots, based on index patterns in rmine[]. NULL for no optimization */
 78:   PetscSFPackOpt  selfleafpackopt; /* Optimization plans to (un)pack leaves connected to local roots */
 79:   PetscBool       selfleafdups;    /* Indices of leaves in rmine[0,roffset[ndranks]) have dups, implying theads working ... */
 80:                                    /* ... on these leaves in parallel may have data race. */
 81:   PetscBool       remoteleafdups;  /* Indices of leaves in rmine[roffset[ndranks],roffset[nranks]) have dups */

 83:   PetscSFPattern  pattern;         /* Pattern of the graph */
 84:   PetscLayout     map;             /* Layout of leaves over all processes when building a patterned graph */
 85: #if defined(PETSC_HAVE_CUDA)
 86:   PetscInt        *rmine_d;        /* A copy of rmine in device memory */
 87:   PetscInt        MAX_CORESIDENT_THREADS;
 88: #endif
 89:   void *data;                      /* Pointer to implementation */
 90: };

 92: PETSC_EXTERN PetscBool PetscSFRegisterAllCalled;
 93: PETSC_EXTERN PetscErrorCode PetscSFRegisterAll(void);

 95: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF,PetscSF*);
 96: PETSC_INTERN PetscErrorCode PetscSFBcastToZero_Private(PetscSF,MPI_Datatype,const void*,void*);

 98: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_unwrap(MPI_Datatype,MPI_Datatype*,PetscBool*);
 99: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype,MPI_Datatype,PetscBool*);
100: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare_contig(MPI_Datatype,MPI_Datatype,PetscInt*);

102: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
103: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Iscatter(a,b,c,d,e,f,g,h,req)
104: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Iscatterv(a,b,c,d,e,f,g,h,i,req)
105: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Igather(a,b,c,d,e,f,g,h,req)
106: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Igatherv(a,b,c,d,e,f,g,h,i,req)
107: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Iallgather(a,b,c,d,e,f,g,req)
108: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Iallgatherv(a,b,c,d,e,f,g,h,req)
109: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Ialltoall(a,b,c,d,e,f,g,req)
110: #else
111: /* Ignore req, the MPI_Request argument, and use MPI blocking collectives. One should initialize req
112:    to MPI_REQUEST_NULL so that one can do MPI_Wait(req,status) no matter the call is blocking or not.
113:  */
114: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Scatter(a,b,c,d,e,f,g,h)
115: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Scatterv(a,b,c,d,e,f,g,h,i)
116: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Gather(a,b,c,d,e,f,g,h)
117: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Gatherv(a,b,c,d,e,f,g,h,i)
118: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Allgather(a,b,c,d,e,f,g)
119: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Allgatherv(a,b,c,d,e,f,g,h)
120: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Alltoall(a,b,c,d,e,f,g)
121: #endif

123: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *mtype)
124: {
127:   *mtype = PETSC_MEMTYPE_HOST;
128: #if defined(PETSC_HAVE_CUDA)
129:   if (use_gpu_aware_mpi) {
130:     struct cudaPointerAttributes attr;
131:     if (data) {
132: #if (CUDART_VERSION < 10000)
133:       attr.memoryType = cudaMemoryTypeHost;
134:       cudaPointerGetAttributes(&attr,data);
135:       cudaGetLastError();
136:       if (attr.memoryType == cudaMemoryTypeDevice) *mtype = PETSC_MEMTYPE_DEVICE;
137: #else
138:       attr.type = cudaMemoryTypeHost;
139:       cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing host pointer will return cudaErrorInvalidValue */
140:       cudaGetLastError(); /* Get and then clear the last error */
141:       if (attr.type == cudaMemoryTypeDevice || attr.type == cudaMemoryTypeManaged) *mtype = PETSC_MEMTYPE_DEVICE;
142: #endif
143:     }
144:   }
145: #endif
146:   return(0);
147: }

149: PETSC_STATIC_INLINE PetscErrorCode PetscMallocWithMemType(PetscMemType mtype,size_t size,void** ptr)
150: {
152:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMalloc(size,ptr);}
153: #if defined(PETSC_HAVE_CUDA)
154:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMalloc(ptr,size);CHKERRCUDA(err);}
155: #endif
156:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d", (int)mtype);
157:   return(0);
158: }

160: PETSC_STATIC_INLINE PetscErrorCode PetscFreeWithMemType_Private(PetscMemType mtype,void* ptr)
161: {
163:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscFree(ptr);}
164: #if defined(PETSC_HAVE_CUDA)
165:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaFree(ptr);CHKERRCUDA(err);}
166: #endif
167:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d",(int)mtype);
168:   return(0);
169: }

171: /* Free memory and set ptr to NULL when succeeded */
172: #define PetscFreeWithMemType(t,p) ((p) && (PetscFreeWithMemType_Private((t),(p)) || ((p)=0,0)))

174: PETSC_STATIC_INLINE PetscErrorCode PetscMemcpyWithMemType(PetscMemType dstmtype,PetscMemType srcmtype,void* dst,const void*src,size_t n)
175: {
177:   if (n) {
178:     if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMemcpy(dst,src,n);}
179: #if defined(PETSC_HAVE_CUDA)
180:     else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_HOST)   {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyHostToDevice);CHKERRCUDA(err);}
181:     else if (dstmtype == PETSC_MEMTYPE_HOST   && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToHost);CHKERRCUDA(err);}
182:     else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToDevice);CHKERRCUDA(err);}
183: #endif
184:     else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType for dst %d and src %d",(int)dstmtype,(int)srcmtype);
185:   }
186:   return(0);
187: }

189: #endif