Actual source code: petscdevice.h

  1: #if !defined(PETSCDEVICE_H)
  2: #define PETSCDEVICE_H

  4: #include <petscsys.h>
  5: #include <petscdevicetypes.h>
  6: #include <petscpkg_version.h>

  8: #if defined(PETSC_HAVE_CUDA)
  9: #include <cuda.h>
 10: #include <cuda_runtime.h>
 11: #include <cublas_v2.h>
 12: #include <cusolverDn.h>
 13: #include <cusolverSp.h>
 14: #include <cufft.h>

 16: /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
 17: PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
 18: PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t);
 19: PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult);

 21: /* REMOVE ME */
 22: #define WaitForCUDA() cudaDeviceSynchronize()

 24: /* CUDART_VERSION = 1000 x major + 10 x minor version */

 26: /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
 27: #if PETSC_PKG_CUDA_VERSION_GE(8,0,0)
 28: #define PetscCallCUDA(...) do {                                         \
 29:     const cudaError_t _p_cuda_err__ = __VA_ARGS__;                      \
 30:     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {                  \
 31:       const char *name  = cudaGetErrorName(_p_cuda_err__);              \
 32:       const char *descr = cudaGetErrorString(_p_cuda_err__);            \
 33:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s",  \
 34:               (PetscErrorCode)_p_cuda_err__,name,descr);                \
 35:     }                                                                   \
 36:   } while (0)
 37: #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
 38: #define PetscCallCUDA(...) do {                                                                \
 39:   const cudaError_t _p_cuda_err__ = __VA_ARGS__;                                               \
 41: } while (0)
 42: #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
 43: #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)

 45: #define PetscCallCUBLAS(...) do {                                       \
 46:     const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__;                \
 47:     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) {     \
 48:       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__);     \
 49:       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) ||       \
 50:            (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED))   &&       \
 51:           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
 52:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
 53:                 "cuBLAS error %d (%s). "                                \
 54:                 "Reports not initialized or alloc failed; "             \
 55:                 "this indicates the GPU may have run out resources",    \
 56:                 (PetscErrorCode)_p_cublas_stat__,name);                 \
 57:       } else {                                                          \
 58:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)",   \
 59:                 (PetscErrorCode)_p_cublas_stat__,name);                 \
 60:       }                                                                 \
 61:     }                                                                   \
 62:   } while (0)
 63: #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)

 65: #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
 66: #define PetscCallCUSPARSE(...)\
 67: do {\
 68:   const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__;\
 69:   if (PetscUnlikely(_p_cusparse_stat__)) {\
 70:     const char *name  = cusparseGetErrorName(_p_cusparse_stat__);\
 71:     const char *descr = cusparseGetErrorString(_p_cusparse_stat__);\
 73:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d (%s) : %s",(int)_p_cusparse_stat__,name,descr);\
 74:   }\
 75: } while (0)
 76: #else  /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
 77: #define PetscCallCUSPARSE(...) do { \
 78:   const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
 80:   } while (0)
 81: #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
 82: #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)

 84: #define PetscCallCUSOLVER(...) do {                                     \
 85:     const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__;            \
 86:     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
 87:       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
 88:       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) ||   \
 89:            (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED)    ||   \
 90:            (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) &&   \
 91:           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
 92:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
 93:                 "cuSolver error %d (%s). "                              \
 94:                 "This indicates the GPU may have run out resources",    \
 95:                 (PetscErrorCode)_p_cusolver_stat__,name);               \
 96:       } else {                                                          \
 97:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)", \
 98:                 (PetscErrorCode)_p_cusolver_stat__,name);               \
 99:       }                                                                 \
100:     }                                                                   \
101:   } while (0)
102: #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)

104: #define PetscCallCUFFT(...)   do {                                      \
105:     const cufftResult_t _p_cufft_stat__ = __VA_ARGS__;                  \
106:     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) {              \
107:       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__);       \
108:       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED)  ||                  \
109:            (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) &&                  \
110:           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
111:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
112:                 "cuFFT error %d (%s). "                                 \
113:                 "Reports not initialized or alloc failed; "             \
114:                 "this indicates the GPU has run out resources",         \
115:                 (PetscErrorCode)_p_cufft_stat__,name);                  \
116:       } else {                                                          \
117:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)",    \
118:                 (PetscErrorCode)_p_cufft_stat__,name);                  \
119:       }                                                                 \
120:     }                                                                   \
121:   } while (0)
122: #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)

124: #define PetscCallCURAND(...)  do {                                      \
125:     const curandStatus_t _p_curand_stat__ = __VA_ARGS__;                \
126:     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) {     \
127:       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \
128:            (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED))    && \
129:           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
130:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
131:                 "cuRAND error %d. "                                     \
132:                 "Reports not initialized or alloc failed; "             \
133:                 "this indicates the GPU has run out resources",         \
134:                 (PetscErrorCode)_p_curand_stat__);                      \
135:       } else {                                                          \
136:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,                          \
137:                 "cuRand error %d",(PetscErrorCode)_p_curand_stat__);    \
138:       }                                                                 \
139:     }                                                                   \
140:   } while (0)
141: #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)

143: PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; /* The default stream used by PETSc */

145: PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*);
146: PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*);
147: #endif /* PETSC_HAVE_CUDA */

149: #if defined(PETSC_HAVE_HIP)
150: #include <hip/hip_runtime.h>
151: #include <hipblas.h>
152: #if defined(__HIP_PLATFORM_NVCC__)
153: #include <cusolverDn.h>
154: #else /* __HIP_PLATFORM_HCC__ */
155: #include <rocsolver.h>
156: #endif /* __HIP_PLATFORM_NVCC__ */

158: /* REMOVE ME */
159: #define WaitForHIP() hipDeviceSynchronize()

161: /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */
162: PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */

164: #define PetscCallHIP(...)     do {                                      \
165:     const hipError_t _p_hip_err__ = __VA_ARGS__;                        \
166:     if (PetscUnlikely(_p_hip_err__ != hipSuccess)) {                    \
167:       const char *name  = hipGetErrorName(_p_hip_err__);                \
168:       const char *descr = hipGetErrorString(_p_hip_err__);              \
169:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s",   \
170:               (PetscErrorCode)_p_hip_err__,name,descr);                 \
171:     }                                                                   \
172:   } while (0)
173: #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)

175: #define PetscCallHIPBLAS(...) do {                                      \
176:     const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__;              \
177:     if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) {   \
178:       const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__);   \
179:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)",    \
180:               (PetscErrorCode)_p_hipblas_stat__,name);                  \
181:     }                                                                   \
182:   } while (0)
183: #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)

185: /* TODO: SEK:  Need to figure out the hipsolver issues */
186: #define PetscCallHIPSOLVER(...) do { \
187:     const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
189:   } while (0)
190: #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)

192: /* hipSolver does not exist yet so we work around it
193:  rocSOLVER users rocBLAS for the handle
194:  * */
195: #if defined(__HIP_PLATFORM_NVCC__)
196: typedef cusolverDnHandle_t hipsolverHandle_t;
197: typedef cusolverStatus_t   hipsolverStatus_t;

199: /* Alias hipsolverDestroy to cusolverDnDestroy */
200: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
201: {
202:   return cusolverDnDestroy(hipsolverhandle);
203: }

205: /* Alias hipsolverCreate to cusolverDnCreate */
206: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
207: {
208:   return cusolverDnCreate(hipsolverhandle);
209: }

211: /* Alias hipsolverGetStream to cusolverDnGetStream */
212: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
213: {
214:   return cusolverDnGetStream(handle,stream);
215: }

217: /* Alias hipsolverSetStream to cusolverDnSetStream */
218: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
219: {
220:   return cusolveDnSetStream(handle,stream);
221: }
222: #else /* __HIP_PLATFORM_HCC__ */
223: typedef rocblas_handle hipsolverHandle_t;
224: typedef rocblas_status hipsolverStatus_t;

226: /* Alias hipsolverDestroy to rocblas_destroy_handle */
227: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t  hipsolverhandle)
228: {
229:   return rocblas_destroy_handle(hipsolverhandle);
230: }

232: /* Alias hipsolverCreate to rocblas_destroy_handle */
233: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
234: {
235:   return rocblas_create_handle(hipsolverhandle);
236: }

238: /* Alias hipsolverGetStream to rocblas_get_stream */
239: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
240: {
241:   return rocblas_get_stream(handle,stream);
242: }

244: /* Alias hipsolverSetStream to rocblas_set_stream */
245: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
246: {
247:   return rocblas_set_stream(handle,stream);
248: }
249: #endif /* __HIP_PLATFORM_NVCC__ */
250: PETSC_EXTERN hipStream_t    PetscDefaultHipStream; /* The default stream used by PETSc */

252: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*);
253: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*);
254: #endif /* PETSC_HAVE_HIP */

256: /* Cannot use the device context api without C++ */
257: #if defined(PETSC_HAVE_CXX)
258: PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
259: PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);

261: /* PetscDevice */
262: PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType);
263: PETSC_EXTERN PetscBool      PetscDeviceInitialized(PetscDeviceType);
264: PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*);
265: PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice);
266: PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer);
267: PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*);
268: PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice,PetscInt*);

270: /* PetscDeviceContext */
271: PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*);
272: PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*);
273: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice);
274: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*);
275: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType);
276: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*);
277: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
278: PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*);
279: PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*);
280: PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext);
281: PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**);
282: PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**);
283: PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
284: PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*);
285: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
286: PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext);
287: #endif /* PETSC_HAVE_CXX */

289: #endif /* PETSCDEVICE_H */