xref: /petsc/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx (revision ab4ee011827fbbb77f789779c813f6db6bb0cbfa)
10e6b6b59SJacob Faibussowitsch #include "sycldevice.hpp"
2a2158755SJunchao Zhang #include <CL/sycl.hpp>
3*ab4ee011SJunchao Zhang #include <Kokkos_Core.hpp>
4a2158755SJunchao Zhang 
5d71ae5a4SJacob Faibussowitsch namespace Petsc
6d71ae5a4SJacob Faibussowitsch {
7a2158755SJunchao Zhang 
8d71ae5a4SJacob Faibussowitsch namespace device
9d71ae5a4SJacob Faibussowitsch {
1017f48955SJacob Faibussowitsch 
11d71ae5a4SJacob Faibussowitsch namespace sycl
12d71ae5a4SJacob Faibussowitsch {
1317f48955SJacob Faibussowitsch 
14d71ae5a4SJacob Faibussowitsch namespace impl
15d71ae5a4SJacob Faibussowitsch {
1617f48955SJacob Faibussowitsch 
179371c9d4SSatish Balay class DeviceContext {
18a2158755SJunchao Zhang public:
19*ab4ee011SJunchao Zhang   struct PetscDeviceContext_SYCL {
200e6b6b59SJacob Faibussowitsch     ::sycl::event event;
210e6b6b59SJacob Faibussowitsch     ::sycl::event begin;   // timer-only
220e6b6b59SJacob Faibussowitsch     ::sycl::event end;     // timer-only
23*ab4ee011SJunchao Zhang     Kokkos::Timer timer{}; // use cpu time since sycl events are return value of queue submission and we have no infrastructure to store them
24*ab4ee011SJunchao Zhang     double        timeBegin{};
25a2158755SJunchao Zhang #if PetscDefined(USE_DEBUG)
26*ab4ee011SJunchao Zhang     PetscBool timerInUse{};
27a2158755SJunchao Zhang #endif
28*ab4ee011SJunchao Zhang     ::sycl::queue queue;
29a2158755SJunchao Zhang   };
30a2158755SJunchao Zhang 
31a2158755SJunchao Zhang private:
32a2158755SJunchao Zhang   static bool initialized_;
33a2158755SJunchao Zhang 
34089fb57cSJacob Faibussowitsch   static PetscErrorCode finalize_() noexcept
35d71ae5a4SJacob Faibussowitsch   {
36a2158755SJunchao Zhang     PetscFunctionBegin;
37a2158755SJunchao Zhang     initialized_ = false;
383ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
39a2158755SJunchao Zhang   }
40a2158755SJunchao Zhang 
41*ab4ee011SJunchao Zhang   static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept
42d71ae5a4SJacob Faibussowitsch   {
43a2158755SJunchao Zhang     PetscFunctionBegin;
449566063dSJacob Faibussowitsch     PetscCall(PetscDeviceCheckDeviceCount_Internal(id));
45a2158755SJunchao Zhang     if (!initialized_) {
46a2158755SJunchao Zhang       initialized_ = true;
479566063dSJacob Faibussowitsch       PetscCall(PetscRegisterFinalize(finalize_));
48a2158755SJunchao Zhang     }
493ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
50a2158755SJunchao Zhang   }
51a2158755SJunchao Zhang 
52a2158755SJunchao Zhang public:
533f675fcfSPierre Jolivet   const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
54a2158755SJunchao Zhang 
55a2158755SJunchao Zhang   // default constructor
5617f48955SJacob Faibussowitsch   DeviceContext() noexcept = default;
57a2158755SJunchao Zhang 
58a2158755SJunchao Zhang   // All of these functions MUST be static in order to be callable from C, otherwise they
59a2158755SJunchao Zhang   // get the implicit 'this' pointer tacked on
60089fb57cSJacob Faibussowitsch   static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
61d71ae5a4SJacob Faibussowitsch   {
62a2158755SJunchao Zhang     PetscFunctionBegin;
63*ab4ee011SJunchao Zhang     delete static_cast<PetscDeviceContext_SYCL *>(dctx->data);
64a2158755SJunchao Zhang     dctx->data = nullptr;
653ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
66a2158755SJunchao Zhang   };
6731d47070SJunchao Zhang 
6831d47070SJunchao Zhang   static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept
6931d47070SJunchao Zhang   {
7031d47070SJunchao Zhang     PetscFunctionBegin;
7131d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
7231d47070SJunchao Zhang     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE;
7331d47070SJunchao Zhang #endif
7431d47070SJunchao Zhang     // petsc/sycl currently only uses Kokkos's default execution space (and its queue),
7531d47070SJunchao Zhang     // so in some sense, we have only one petsc device context.
7631d47070SJunchao Zhang     PetscCall(PetscKokkosInitializeCheck());
7731d47070SJunchao Zhang     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue();
7831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
7931d47070SJunchao Zhang   };
8031d47070SJunchao Zhang 
8131d47070SJunchao Zhang   static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept
8231d47070SJunchao Zhang   {
8331d47070SJunchao Zhang     PetscFunctionBegin;
8431d47070SJunchao Zhang     // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc
8531d47070SJunchao Zhang     // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE;
8631d47070SJunchao Zhang     *idle = PETSC_FALSE;
8731d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
8831d47070SJunchao Zhang   };
8931d47070SJunchao Zhang 
9031d47070SJunchao Zhang   static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept
9131d47070SJunchao Zhang   {
9231d47070SJunchao Zhang     PetscBool  idle = PETSC_TRUE;
9331d47070SJunchao Zhang     const auto dci  = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
9431d47070SJunchao Zhang 
9531d47070SJunchao Zhang     PetscFunctionBegin;
9631d47070SJunchao Zhang     PetscCall(query(dctx, &idle));
9731d47070SJunchao Zhang     if (!idle) PetscCallCXX(dci->queue.wait());
9831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
9931d47070SJunchao Zhang   };
10031d47070SJunchao Zhang 
10131d47070SJunchao Zhang   static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void *handle) noexcept
10231d47070SJunchao Zhang   {
10331d47070SJunchao Zhang     PetscFunctionBegin;
10431d47070SJunchao Zhang     *static_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue);
10531d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
10631d47070SJunchao Zhang   };
10731d47070SJunchao Zhang 
10831d47070SJunchao Zhang   static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept
10931d47070SJunchao Zhang   {
11031d47070SJunchao Zhang     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
11131d47070SJunchao Zhang 
11231d47070SJunchao Zhang     PetscFunctionBegin;
11331d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
11431d47070SJunchao Zhang     PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?");
11531d47070SJunchao Zhang     dci->timerInUse = PETSC_TRUE;
11631d47070SJunchao Zhang #endif
11731d47070SJunchao Zhang     PetscCallCXX(dci->timeBegin = dci->timer.seconds());
11831d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
11931d47070SJunchao Zhang   };
12031d47070SJunchao Zhang 
12131d47070SJunchao Zhang   static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept
12231d47070SJunchao Zhang   {
12331d47070SJunchao Zhang     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
12431d47070SJunchao Zhang 
12531d47070SJunchao Zhang     PetscFunctionBegin;
12631d47070SJunchao Zhang #if PetscDefined(USE_DEBUG)
12731d47070SJunchao Zhang     PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?");
12831d47070SJunchao Zhang     dci->timerInUse = PETSC_FALSE;
12931d47070SJunchao Zhang #endif
13031d47070SJunchao Zhang     PetscCallCXX(dci->queue.wait());
13131d47070SJunchao Zhang     PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin);
13231d47070SJunchao Zhang     PetscFunctionReturn(PETSC_SUCCESS);
13331d47070SJunchao Zhang   };
13431d47070SJunchao Zhang 
135089fb57cSJacob Faibussowitsch   static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
136089fb57cSJacob Faibussowitsch   static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
137089fb57cSJacob Faibussowitsch   static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
138089fb57cSJacob Faibussowitsch   static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); };
139a2158755SJunchao Zhang };
140a2158755SJunchao Zhang 
1410e6b6b59SJacob Faibussowitsch } // namespace impl
14217f48955SJacob Faibussowitsch 
1430e6b6b59SJacob Faibussowitsch } // namespace sycl
14417f48955SJacob Faibussowitsch 
1450e6b6b59SJacob Faibussowitsch } // namespace device
14617f48955SJacob Faibussowitsch 
147a2158755SJunchao Zhang } // namespace Petsc
148a2158755SJunchao Zhang 
149d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx)
150d71ae5a4SJacob Faibussowitsch {
1510e6b6b59SJacob Faibussowitsch   using namespace Petsc::device::sycl::impl;
15217f48955SJacob Faibussowitsch 
15317f48955SJacob Faibussowitsch   static const DeviceContext syclctx;
154a2158755SJunchao Zhang 
155a2158755SJunchao Zhang   PetscFunctionBegin;
156*ab4ee011SJunchao Zhang   PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL());
1579566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(dctx->ops, &syclctx.ops, sizeof(syclctx.ops)));
1583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
159a2158755SJunchao Zhang }
160