Actual source code: petscdevice_cuda.h

  1: #ifndef PETSCDEVICE_CUDA_H
  2: #define PETSCDEVICE_CUDA_H

  4: #include <petscdevice.h>
  5: #include <petscpkg_version.h>

  7: #if defined(__NVCC__) || defined(__CUDACC__)
  8:   #define PETSC_USING_NVCC 1
  9: #endif

 11: #if PetscDefined(HAVE_CUDA)
 12:   #include <cuda.h>
 13:   #include <cuda_runtime.h>
 14:   #include <cublas_v2.h>
 15:   #include <cusolverDn.h>
 16:   #include <cusolverSp.h>
 17:   #include <cufft.h>

 19: /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
 20: PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
 21: PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
 22: PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);

 24:   /* REMOVE ME */
 25:   #define WaitForCUDA() cudaDeviceSynchronize()

 27:   /* CUDART_VERSION = 1000 x major + 10 x minor version */

 29:   /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
 30:   #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
 31:     #define PetscCallCUDAVoid(...) \
 32:       do { \
 33:         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
 35:       } while (0)

 37:     #define PetscCallCUDA(...) \
 38:       do { \
 39:         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
 41:       } while (0)
 42:   #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
 43:     #define PetscCallCUDA(...) \
 44:       do { \
 45:         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
 47:       } while (0)

 49:     #define PetscCallCUDAVoid(...) \
 50:       do { \
 51:         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
 53:       } while (0)
 54:   #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
 55:   #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)

 57:   #define PetscCUDACheckLaunch \
 58:     do { \
 59:       /* Check synchronous errors, i.e. pre-launch */ \
 60:       cudaGetLastError(); \
 61:       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
 62:       cudaDeviceSynchronize(); \
 63:     } while (0)

 65:   #define PetscCallCUBLAS(...) \
 66:     do { \
 67:       const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
 68:       if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
 69:         const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
 70:         if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
 71:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
 72:                   "cuBLAS error %d (%s). " \
 73:                   "Reports not initialized or alloc failed; " \
 74:                   "this indicates the GPU may have run out resources", \
 75:                   (PetscErrorCode)_p_cublas_stat__, name); \
 76:         } else { \
 77:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
 78:         } \
 79:       } \
 80:     } while (0)
 81:   #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)

 83:   #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
 84:     #define PetscCallCUSPARSE(...) \
 85:       do { \
 86:         const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
 87:         if (PetscUnlikely(_p_cusparse_stat__)) { \
 88:           const char *name  = cusparseGetErrorName(_p_cusparse_stat__); \
 89:           const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
 91:                      "cuSPARSE errorcode %d (%s) : %s.; " \
 92:                      "this indicates the GPU has run out resources", \
 93:                      (int)_p_cusparse_stat__, name, descr); \
 94:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
 95:         } \
 96:       } while (0)
 97:   #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
 98:     #define PetscCallCUSPARSE(...) \
 99:       do { \
100:         const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
102:       } while (0)
103:   #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
104:   #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)

106:   #define PetscCallCUSOLVER(...) \
107:     do { \
108:       const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
109:       if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
110:         const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
111:         if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
112:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
113:                   "cuSolver error %d (%s). " \
114:                   "This indicates the GPU may have run out resources", \
115:                   (PetscErrorCode)_p_cusolver_stat__, name); \
116:         } else { \
117:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
118:         } \
119:       } \
120:     } while (0)
121:   #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)

123:   #define PetscCallCUFFT(...) \
124:     do { \
125:       const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
126:       if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
127:         const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
128:         if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
129:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
130:                   "cuFFT error %d (%s). " \
131:                   "Reports not initialized or alloc failed; " \
132:                   "this indicates the GPU has run out resources", \
133:                   (PetscErrorCode)_p_cufft_stat__, name); \
134:         } else { \
135:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
136:         } \
137:       } \
138:     } while (0)
139:   #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)

141:   #define PetscCallCURAND(...) \
142:     do { \
143:       const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
144:       if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
145:         if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
146:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
147:                   "cuRAND error %d. " \
148:                   "Reports not initialized or alloc failed; " \
149:                   "this indicates the GPU has run out resources", \
150:                   (PetscErrorCode)_p_curand_stat__); \
151:         } else { \
152:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
153:         } \
154:       } \
155:     } while (0)
156:   #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)

158: PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; // The default stream used by PETSc
159: PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
160: PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);

162: #endif // PETSC_HAVE_CUDA

164: // these can also be defined in petscdevice_hip.h
165: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
166:   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
167:   #if PetscDefined(USING_NVCC)
168:     #define PETSC_HOST_DECL      __host__
169:     #define PETSC_DEVICE_DECL    __device__
170:     #define PETSC_KERNEL_DECL    __global__
171:     #define PETSC_SHAREDMEM_DECL __shared__
172:     #define PETSC_FORCEINLINE    __forceinline__
173:     #define PETSC_CONSTMEM_DECL  __constant__
174:   #else
175:     #define PETSC_HOST_DECL
176:     #define PETSC_DEVICE_DECL
177:     #define PETSC_KERNEL_DECL
178:     #define PETSC_SHAREDMEM_DECL
179:     #define PETSC_FORCEINLINE inline
180:     #define PETSC_CONSTMEM_DECL
181:   #endif // PETSC_USING_NVCC

183:   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
184:   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
185:   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
186: #endif // PETSC_DEVICE_DEFINED_DECLS_PRIVATE

188: #endif // PETSCDEVICE_CUDA_H