Actual source code: syclcontext.sycl.cxx

  1: #include "sycldevice.hpp"
  2: #include <CL/sycl.hpp>
  3: #include <Kokkos_Core.hpp>

  5: namespace Petsc
  6: {

  8: namespace device
  9: {

 11: namespace sycl
 12: {

 14: namespace impl
 15: {

 17: class DeviceContext {
 18: public:
 19:   struct PetscDeviceContext_SYCL {
 20:     ::sycl::event event;
 21:     ::sycl::event begin;   // timer-only
 22:     ::sycl::event end;     // timer-only
 23:     Kokkos::Timer timer{}; // use cpu time since sycl events are return value of queue submission and we have no infrastructure to store them
 24:     double        timeBegin{};
 25: #if PetscDefined(USE_DEBUG)
 26:     PetscBool timerInUse{};
 27: #endif
 28:     ::sycl::queue queue;
 29:   };

 31: private:
 32:   static bool initialized_;

 34:   static PetscErrorCode finalize_() noexcept
 35:   {
 36:     PetscFunctionBegin;
 37:     initialized_ = false;
 38:     PetscFunctionReturn(PETSC_SUCCESS);
 39:   }

 41:   static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept
 42:   {
 43:     PetscFunctionBegin;
 44:     PetscCall(PetscDeviceCheckDeviceCount_Internal(id));
 45:     if (!initialized_) {
 46:       initialized_ = true;
 47:       PetscCall(PetscRegisterFinalize(finalize_));
 48:     }
 49:     PetscFunctionReturn(PETSC_SUCCESS);
 50:   }

 52: public:
 53:   const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};

 55:   // default constructor
 56:   DeviceContext() noexcept = default;

 58:   // All of these functions MUST be static in order to be callable from C, otherwise they
 59:   // get the implicit 'this' pointer tacked on
 60:   static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
 61:   {
 62:     PetscFunctionBegin;
 63:     delete static_cast<PetscDeviceContext_SYCL *>(dctx->data);
 64:     dctx->data = nullptr;
 65:     PetscFunctionReturn(PETSC_SUCCESS);
 66:   };

 68:   static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept
 69:   {
 70:     PetscFunctionBegin;
 71: #if PetscDefined(USE_DEBUG)
 72:     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE;
 73: #endif
 74:     // petsc/sycl currently only uses Kokkos's default execution space (and its queue),
 75:     // so in some sense, we have only one petsc device context.
 76:     PetscCall(PetscKokkosInitializeCheck());
 77:     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue();
 78:     PetscFunctionReturn(PETSC_SUCCESS);
 79:   };

 81:   static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept
 82:   {
 83:     PetscFunctionBegin;
 84:     // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc
 85:     // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE;
 86:     *idle = PETSC_FALSE;
 87:     PetscFunctionReturn(PETSC_SUCCESS);
 88:   };

 90:   static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept
 91:   {
 92:     PetscBool  idle = PETSC_TRUE;
 93:     const auto dci  = static_cast<PetscDeviceContext_SYCL *>(dctx->data);

 95:     PetscFunctionBegin;
 96:     PetscCall(query(dctx, &idle));
 97:     if (!idle) PetscCallCXX(dci->queue.wait());
 98:     PetscFunctionReturn(PETSC_SUCCESS);
 99:   };

101:   static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept
102:   {
103:     PetscFunctionBegin;
104:     *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue);
105:     PetscFunctionReturn(PETSC_SUCCESS);
106:   };

108:   static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept
109:   {
110:     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);

112:     PetscFunctionBegin;
113: #if PetscDefined(USE_DEBUG)
114:     PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?");
115:     dci->timerInUse = PETSC_TRUE;
116: #endif
117:     PetscCallCXX(dci->timeBegin = dci->timer.seconds());
118:     PetscFunctionReturn(PETSC_SUCCESS);
119:   };

121:   static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept
122:   {
123:     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);

125:     PetscFunctionBegin;
126: #if PetscDefined(USE_DEBUG)
127:     PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?");
128:     dci->timerInUse = PETSC_FALSE;
129: #endif
130:     PetscCallCXX(dci->queue.wait());
131:     PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin);
132:     PetscFunctionReturn(PETSC_SUCCESS);
133:   };

135:   static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
136:   static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
137:   static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
138:   static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
139: };

141: } // namespace impl

143: } // namespace sycl

145: } // namespace device

147: } // namespace Petsc

149: PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx)
150: {
151:   using namespace Petsc::device::sycl::impl;

153:   static const DeviceContext syclctx;

155:   PetscFunctionBegin;
156:   PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL());
157:   dctx->ops[0] = syclctx.ops;
158:   PetscFunctionReturn(PETSC_SUCCESS);
159: }