xref: /petsc/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx (revision f4f49eeac7efa77fffa46b7ff95a3ed169f659ed)
10e6b6b59SJacob Faibussowitsch #include "sycldevice.hpp"
2a2158755SJunchao Zhang #include <CL/sycl.hpp>
3ab4ee011SJunchao Zhang #include <Kokkos_Core.hpp>
4a2158755SJunchao Zhang 
5d71ae5a4SJacob Faibussowitsch namespace Petsc
6d71ae5a4SJacob Faibussowitsch {
7a2158755SJunchao Zhang 
8d71ae5a4SJacob Faibussowitsch namespace device
9d71ae5a4SJacob Faibussowitsch {
1017f48955SJacob Faibussowitsch 
11d71ae5a4SJacob Faibussowitsch namespace sycl
12d71ae5a4SJacob Faibussowitsch {
1317f48955SJacob Faibussowitsch 
14d71ae5a4SJacob Faibussowitsch namespace impl
15d71ae5a4SJacob Faibussowitsch {
1617f48955SJacob Faibussowitsch 
179371c9d4SSatish Balay class DeviceContext {
18a2158755SJunchao Zhang public:
19ab4ee011SJunchao Zhang   struct PetscDeviceContext_SYCL {
200e6b6b59SJacob Faibussowitsch     ::sycl::event event;
210e6b6b59SJacob Faibussowitsch     ::sycl::event begin;   // timer-only
220e6b6b59SJacob Faibussowitsch     ::sycl::event end;     // timer-only
23ab4ee011SJunchao Zhang     Kokkos::Timer timer{}; // use cpu time since sycl events are return value of queue submission and we have no infrastructure to store them
24ab4ee011SJunchao Zhang     double        timeBegin{};
25a2158755SJunchao Zhang #if PetscDefined(USE_DEBUG)
26ab4ee011SJunchao Zhang     PetscBool timerInUse{};
27a2158755SJunchao Zhang #endif
28ab4ee011SJunchao Zhang     ::sycl::queue queue;
29a2158755SJunchao Zhang   };
30a2158755SJunchao Zhang 
31a2158755SJunchao Zhang private:
32a2158755SJunchao Zhang   static bool initialized_;
33a2158755SJunchao Zhang 
34089fb57cSJacob Faibussowitsch   static PetscErrorCode finalize_() noexcept
35d71ae5a4SJacob Faibussowitsch   {
36a2158755SJunchao Zhang     PetscFunctionBegin;
37a2158755SJunchao Zhang     initialized_ = false;
383ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
39a2158755SJunchao Zhang   }
40a2158755SJunchao Zhang 
41ab4ee011SJunchao Zhang   static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept
42d71ae5a4SJacob Faibussowitsch   {
43a2158755SJunchao Zhang     PetscFunctionBegin;
449566063dSJacob Faibussowitsch     PetscCall(PetscDeviceCheckDeviceCount_Internal(id));
45a2158755SJunchao Zhang     if (!initialized_) {
46a2158755SJunchao Zhang       initialized_ = true;
479566063dSJacob Faibussowitsch       PetscCall(PetscRegisterFinalize(finalize_));
48a2158755SJunchao Zhang     }
493ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
50a2158755SJunchao Zhang   }
51a2158755SJunchao Zhang 
52a2158755SJunchao Zhang public:
533f675fcfSPierre Jolivet   const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
54a2158755SJunchao Zhang 
55a2158755SJunchao Zhang   // default constructor
5617f48955SJacob Faibussowitsch   DeviceContext() noexcept = default;
57a2158755SJunchao Zhang 
58a2158755SJunchao Zhang   // All of these functions MUST be static in order to be callable from C, otherwise they
59a2158755SJunchao Zhang   // get the implicit 'this' pointer tacked on
60089fb57cSJacob Faibussowitsch   static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
61d71ae5a4SJacob Faibussowitsch   {
62a2158755SJunchao Zhang     PetscFunctionBegin;
63ab4ee011SJunchao Zhang     delete static_cast<PetscDeviceContext_SYCL *>(dctx->data);
64a2158755SJunchao Zhang     dctx->data = nullptr;
653ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
66*f4f49eeaSPierre Jolivet   }
6731d47070SJunchao Zhang 
6831d47070SJunchao Zhang   static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept
6931d47070SJunchao Zhang   {
7031d47070SJunchao Zhang     PetscFunctionBegin;
7131d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
7231d47070SJunchao Zhang     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE;
7331d47070SJunchao Zhang #endif
7431d47070SJunchao Zhang     // petsc/sycl currently only uses Kokkos's default execution space (and its queue),
7531d47070SJunchao Zhang     // so in some sense, we have only one petsc device context.
7631d47070SJunchao Zhang     PetscCall(PetscKokkosInitializeCheck());
7731d47070SJunchao Zhang     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue();
7831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
79*f4f49eeaSPierre Jolivet   }
8031d47070SJunchao Zhang 
8131d47070SJunchao Zhang   static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept
8231d47070SJunchao Zhang   {
8331d47070SJunchao Zhang     PetscFunctionBegin;
8431d47070SJunchao Zhang     // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc
8531d47070SJunchao Zhang     // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE;
8631d47070SJunchao Zhang     *idle = PETSC_FALSE;
8731d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
88*f4f49eeaSPierre Jolivet   }
8931d47070SJunchao Zhang 
9031d47070SJunchao Zhang   static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept
9131d47070SJunchao Zhang   {
9231d47070SJunchao Zhang     PetscBool  idle = PETSC_TRUE;
9331d47070SJunchao Zhang     const auto dci  = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
9431d47070SJunchao Zhang 
9531d47070SJunchao Zhang     PetscFunctionBegin;
9631d47070SJunchao Zhang     PetscCall(query(dctx, &idle));
9731d47070SJunchao Zhang     if (!idle) PetscCallCXX(dci->queue.wait());
9831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
99*f4f49eeaSPierre Jolivet   }
10031d47070SJunchao Zhang 
10197cd0981SJacob Faibussowitsch   static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept
10231d47070SJunchao Zhang   {
10331d47070SJunchao Zhang     PetscFunctionBegin;
10497cd0981SJacob Faibussowitsch     *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue);
10531d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
106*f4f49eeaSPierre Jolivet   }
10731d47070SJunchao Zhang 
10831d47070SJunchao Zhang   static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept
10931d47070SJunchao Zhang   {
11031d47070SJunchao Zhang     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
11131d47070SJunchao Zhang 
11231d47070SJunchao Zhang     PetscFunctionBegin;
11331d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
11431d47070SJunchao Zhang     PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?");
11531d47070SJunchao Zhang     dci->timerInUse = PETSC_TRUE;
11631d47070SJunchao Zhang #endif
11731d47070SJunchao Zhang     PetscCallCXX(dci->timeBegin = dci->timer.seconds());
11831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
119*f4f49eeaSPierre Jolivet   }
12031d47070SJunchao Zhang 
12131d47070SJunchao Zhang   static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept
12231d47070SJunchao Zhang   {
12331d47070SJunchao Zhang     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
12431d47070SJunchao Zhang 
12531d47070SJunchao Zhang     PetscFunctionBegin;
12631d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
12731d47070SJunchao Zhang     PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?");
12831d47070SJunchao Zhang     dci->timerInUse = PETSC_FALSE;
12931d47070SJunchao Zhang #endif
13031d47070SJunchao Zhang     PetscCallCXX(dci->queue.wait());
13131d47070SJunchao Zhang     PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin);
13231d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
133*f4f49eeaSPierre Jolivet   }
13431d47070SJunchao Zhang 
135*f4f49eeaSPierre Jolivet   static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
136*f4f49eeaSPierre Jolivet   static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
137*f4f49eeaSPierre Jolivet   static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
138*f4f49eeaSPierre Jolivet   static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
139a2158755SJunchao Zhang };
140a2158755SJunchao Zhang 
1410e6b6b59SJacob Faibussowitsch } // namespace impl
14217f48955SJacob Faibussowitsch 
1430e6b6b59SJacob Faibussowitsch } // namespace sycl
14417f48955SJacob Faibussowitsch 
1450e6b6b59SJacob Faibussowitsch } // namespace device
14617f48955SJacob Faibussowitsch 
147a2158755SJunchao Zhang } // namespace Petsc
148a2158755SJunchao Zhang 
149d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx)
150d71ae5a4SJacob Faibussowitsch {
1510e6b6b59SJacob Faibussowitsch   using namespace Petsc::device::sycl::impl;
15217f48955SJacob Faibussowitsch 
15317f48955SJacob Faibussowitsch   static const DeviceContext syclctx;
154a2158755SJunchao Zhang 
155a2158755SJunchao Zhang   PetscFunctionBegin;
156ab4ee011SJunchao Zhang   PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL());
157aea10558SJacob Faibussowitsch   dctx->ops[0] = syclctx.ops;
1583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
159a2158755SJunchao Zhang }
160