Actual source code: petscdevice_cuda.h
1: #ifndef PETSCDEVICE_CUDA_H
2: #define PETSCDEVICE_CUDA_H
4: #include <petscdevice.h>
5: #include <petscpkg_version.h>
7: #if defined(__NVCC__) || defined(__CUDACC__)
8: #define PETSC_USING_NVCC 1
9: #endif
11: #if PetscDefined(HAVE_CUDA)
12: #include <cuda.h>
13: #include <cuda_runtime.h>
14: #include <cublas_v2.h>
15: #include <cusolverDn.h>
16: #include <cusolverSp.h>
17: #include <cufft.h>
19: /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
20: PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
21: PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
22: PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);
24: /* REMOVE ME */
25: #define WaitForCUDA() cudaDeviceSynchronize()
27: /* CUDART_VERSION = 1000 x major + 10 x minor version */
29: /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
30: #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
31: #define PetscCallCUDAVoid(...) \
32: do { \
33: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
35: } while (0)
37: #define PetscCallCUDA(...) \
38: do { \
39: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
41: } while (0)
42: #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
43: #define PetscCallCUDA(...) \
44: do { \
45: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
47: } while (0)
49: #define PetscCallCUDAVoid(...) \
50: do { \
51: const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
53: } while (0)
54: #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
55: #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
57: #define PetscCUDACheckLaunch \
58: do { \
59: /* Check synchronous errors, i.e. pre-launch */ \
60: cudaGetLastError(); \
61: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
62: cudaDeviceSynchronize(); \
63: } while (0)
65: #define PetscCallCUBLAS(...) \
66: do { \
67: const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
68: if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
69: const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
70: if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
71: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
72: "cuBLAS error %d (%s). " \
73: "Reports not initialized or alloc failed; " \
74: "this indicates the GPU may have run out resources", \
75: (PetscErrorCode)_p_cublas_stat__, name); \
76: } else { \
77: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
78: } \
79: } \
80: } while (0)
81: #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
83: #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
84: #define PetscCallCUSPARSE(...) \
85: do { \
86: const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
87: if (PetscUnlikely(_p_cusparse_stat__)) { \
88: const char *name = cusparseGetErrorName(_p_cusparse_stat__); \
89: const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
91: "cuSPARSE errorcode %d (%s) : %s.; " \
92: "this indicates the GPU has run out resources", \
93: (int)_p_cusparse_stat__, name, descr); \
94: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
95: } \
96: } while (0)
97: #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
98: #define PetscCallCUSPARSE(...) \
99: do { \
100: const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
102: } while (0)
103: #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
104: #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
106: #define PetscCallCUSOLVER(...) \
107: do { \
108: const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
109: if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
110: const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
111: if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
112: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
113: "cuSolver error %d (%s). " \
114: "This indicates the GPU may have run out resources", \
115: (PetscErrorCode)_p_cusolver_stat__, name); \
116: } else { \
117: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
118: } \
119: } \
120: } while (0)
121: #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
123: #define PetscCallCUFFT(...) \
124: do { \
125: const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
126: if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
127: const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
128: if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
129: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
130: "cuFFT error %d (%s). " \
131: "Reports not initialized or alloc failed; " \
132: "this indicates the GPU has run out resources", \
133: (PetscErrorCode)_p_cufft_stat__, name); \
134: } else { \
135: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
136: } \
137: } \
138: } while (0)
139: #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
141: #define PetscCallCURAND(...) \
142: do { \
143: const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
144: if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
145: if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
146: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
147: "cuRAND error %d. " \
148: "Reports not initialized or alloc failed; " \
149: "this indicates the GPU has run out resources", \
150: (PetscErrorCode)_p_curand_stat__); \
151: } else { \
152: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
153: } \
154: } \
155: } while (0)
156: #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
158: PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; // The default stream used by PETSc
159: PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
160: PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);
162: #endif // PETSC_HAVE_CUDA
164: // these can also be defined in petscdevice_hip.h
165: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
166: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
167: #if PetscDefined(USING_NVCC)
168: #define PETSC_HOST_DECL __host__
169: #define PETSC_DEVICE_DECL __device__
170: #define PETSC_KERNEL_DECL __global__
171: #define PETSC_SHAREDMEM_DECL __shared__
172: #define PETSC_FORCEINLINE __forceinline__
173: #define PETSC_CONSTMEM_DECL __constant__
174: #else
175: #define PETSC_HOST_DECL
176: #define PETSC_DEVICE_DECL
177: #define PETSC_KERNEL_DECL
178: #define PETSC_SHAREDMEM_DECL
179: #define PETSC_FORCEINLINE inline
180: #define PETSC_CONSTMEM_DECL
181: #endif // PETSC_USING_NVCC
183: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
184: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
185: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
186: #endif // PETSC_DEVICE_DEFINED_DECLS_PRIVATE
188: #endif // PETSCDEVICE_CUDA_H