Actual source code: petscdevice_hip.h
1: #pragma once
3: #include <petscdevice.h>
4: #include <petscpkg_version.h>
6: #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
7: #define PETSC_USING_HCC 1
8: #endif
10: #if PetscDefined(HAVE_HIP)
11: #include <hip/hip_runtime.h>
13: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
14: #include <hipblas/hipblas.h>
15: #include <hipsparse/hipsparse.h>
16: #else
17: #include <hipblas.h>
18: #include <hipsparse.h>
19: #endif
21: #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
22: #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
23: #endif
25: #if defined(__HIP_PLATFORM_NVCC__)
26: #include <cusolverDn.h>
27: #else // __HIP_PLATFORM_HCC__
28: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
29: #include <hipsolver/hipsolver.h>
30: #else
31: #include <hipsolver.h>
32: #endif
33: #endif // __HIP_PLATFORM_NVCC__
34: #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
36: // REMOVE ME
37: #define WaitForHIP() hipDeviceSynchronize()
39: /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
40: PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
41: PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
42: PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
44: #define PetscCallHIP(...) \
45: do { \
46: const hipError_t _p_hip_err__ = __VA_ARGS__; \
47: if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
48: const char *name = hipGetErrorName(_p_hip_err__); \
49: const char *descr = hipGetErrorString(_p_hip_err__); \
50: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
51: } \
52: } while (0)
53: #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
55: #define PetscHIPCheckLaunch \
56: do { \
57: /* Check synchronous errors, i.e. pre-launch */ \
58: PetscCallHIP(hipGetLastError()); \
59: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
60: PetscCallHIP(hipDeviceSynchronize()); \
61: } while (0)
63: #define PetscCallHIPBLAS(...) \
64: do { \
65: const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
66: if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
67: const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
68: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
69: } \
70: } while (0)
71: #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
73: #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
74: /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
75: #define PetscCallHIPSPARSE(...) \
76: do { \
77: const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
78: if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
79: const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
80: PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
81: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
82: } \
83: } while (0)
84: #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
86: #define PetscCallHIPSOLVER(...) \
87: do { \
88: const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
89: if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
90: const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
91: if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
92: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
93: "hipSolver error %d (%s). " \
94: "This indicates the GPU may have run out resources", \
95: (PetscErrorCode)_p_hipsolver_stat__, name); \
96: } else { \
97: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
98: } \
99: } \
100: } while (0)
101: #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
103: #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
104: /* hipSolver does not exist yet so we work around it
105: rocSOLVER users rocBLAS for the handle
106: * */
107: #if defined(__HIP_PLATFORM_NVCC__)
108: #include <cusolverDn.h>
109: typedef cusolverDnHandle_t hipsolverHandle_t;
110: typedef cusolverStatus_t hipsolverStatus_t;
112: /* Alias hipsolverDestroy to cusolverDnDestroy */
113: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
114: {
115: return cusolverDnDestroy(hipsolverhandle);
116: }
118: /* Alias hipsolverCreate to cusolverDnCreate */
119: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
120: {
121: return cusolverDnCreate(hipsolverhandle);
122: }
124: /* Alias hipsolverGetStream to cusolverDnGetStream */
125: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
126: {
127: return cusolverDnGetStream(handle, stream);
128: }
130: /* Alias hipsolverSetStream to cusolverDnSetStream */
131: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
132: {
133: return cusolveDnSetStream(handle, stream);
134: }
135: #else /* __HIP_PLATFORM_HCC__ */
136: #include <rocsolver.h>
137: #include <rocblas.h>
138: typedef rocblas_handle hipsolverHandle_t;
139: typedef rocblas_status hipsolverStatus_t;
141: /* Alias hipsolverDestroy to rocblas_destroy_handle */
142: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
143: {
144: return rocblas_destroy_handle(hipsolverhandle);
145: }
147: /* Alias hipsolverCreate to rocblas_destroy_handle */
148: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
149: {
150: return rocblas_create_handle(hipsolverhandle);
151: }
153: // Alias hipsolverGetStream to rocblas_get_stream
154: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
155: {
156: return rocblas_get_stream(handle, stream);
157: }
159: // Alias hipsolverSetStream to rocblas_set_stream
160: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
161: {
162: return rocblas_set_stream(handle, stream);
163: }
164: #endif // __HIP_PLATFORM_NVCC__
165: #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
166: // REMOVE ME
167: PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc
168: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
169: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
170: PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
172: #endif // PETSC_HAVE_HIP
174: // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
175: // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
176: // would already be defined, but they would be empty since we cannot be using NVCC at the same
177: // time.
178: #if PetscDefined(USING_HCC)
179: #undef PETSC_HOST_DECL
180: #undef PETSC_DEVICE_DECL
181: #undef PETSC_KERNEL_DECL
182: #undef PETSC_SHAREDMEM_DECL
183: #undef PETSC_FORCEINLINE
184: #undef PETSC_CONSTMEM_DECL
186: #define PETSC_HOST_DECL __host__
187: #define PETSC_DEVICE_DECL __device__
188: #define PETSC_KERNEL_DECL __global__
189: #define PETSC_SHAREDMEM_DECL __shared__
190: #define PETSC_FORCEINLINE __forceinline__
191: #define PETSC_CONSTMEM_DECL __constant__
192: #endif
194: #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
195: #define PETSC_HOST_DECL
196: #define PETSC_DEVICE_DECL
197: #define PETSC_KERNEL_DECL
198: #define PETSC_SHAREDMEM_DECL
199: #define PETSC_FORCEINLINE inline
200: #define PETSC_CONSTMEM_DECL
201: #endif
203: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
204: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
205: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
206: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
207: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
208: #endif