10e6b6b59SJacob Faibussowitsch #include "sycldevice.hpp" 2a2158755SJunchao Zhang #include <CL/sycl.hpp> 3ab4ee011SJunchao Zhang #include <Kokkos_Core.hpp> 4a2158755SJunchao Zhang 5d71ae5a4SJacob Faibussowitsch namespace Petsc 6d71ae5a4SJacob Faibussowitsch { 7a2158755SJunchao Zhang 8d71ae5a4SJacob Faibussowitsch namespace device 9d71ae5a4SJacob Faibussowitsch { 1017f48955SJacob Faibussowitsch 11d71ae5a4SJacob Faibussowitsch namespace sycl 12d71ae5a4SJacob Faibussowitsch { 1317f48955SJacob Faibussowitsch 14d71ae5a4SJacob Faibussowitsch namespace impl 15d71ae5a4SJacob Faibussowitsch { 1617f48955SJacob Faibussowitsch 179371c9d4SSatish Balay class DeviceContext { 18a2158755SJunchao Zhang public: 19ab4ee011SJunchao Zhang struct PetscDeviceContext_SYCL { 200e6b6b59SJacob Faibussowitsch ::sycl::event event; 210e6b6b59SJacob Faibussowitsch ::sycl::event begin; // timer-only 220e6b6b59SJacob Faibussowitsch ::sycl::event end; // timer-only 23ab4ee011SJunchao Zhang Kokkos::Timer timer{}; // use cpu time since sycl events are return value of queue submission and we have no infrastructure to store them 24ab4ee011SJunchao Zhang double timeBegin{}; 25a2158755SJunchao Zhang #if PetscDefined(USE_DEBUG) 26ab4ee011SJunchao Zhang PetscBool timerInUse{}; 27a2158755SJunchao Zhang #endif 28ab4ee011SJunchao Zhang ::sycl::queue queue; 29a2158755SJunchao Zhang }; 30a2158755SJunchao Zhang 31a2158755SJunchao Zhang private: 32a2158755SJunchao Zhang static bool initialized_; 33a2158755SJunchao Zhang 34089fb57cSJacob Faibussowitsch static PetscErrorCode finalize_() noexcept 35d71ae5a4SJacob Faibussowitsch { 36a2158755SJunchao Zhang PetscFunctionBegin; 37a2158755SJunchao Zhang initialized_ = false; 383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39a2158755SJunchao Zhang } 40a2158755SJunchao Zhang 41ab4ee011SJunchao Zhang static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept 42d71ae5a4SJacob Faibussowitsch { 43a2158755SJunchao Zhang PetscFunctionBegin; 449566063dSJacob Faibussowitsch PetscCall(PetscDeviceCheckDeviceCount_Internal(id)); 45a2158755SJunchao Zhang if (!initialized_) { 46a2158755SJunchao Zhang initialized_ = true; 479566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(finalize_)); 48a2158755SJunchao Zhang } 493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50a2158755SJunchao Zhang } 51a2158755SJunchao Zhang 52a2158755SJunchao Zhang public: 533f675fcfSPierre Jolivet const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; 54a2158755SJunchao Zhang 55a2158755SJunchao Zhang // default constructor 5617f48955SJacob Faibussowitsch DeviceContext() noexcept = default; 57a2158755SJunchao Zhang 58a2158755SJunchao Zhang // All of these functions MUST be static in order to be callable from C, otherwise they 59a2158755SJunchao Zhang // get the implicit 'this' pointer tacked on 60089fb57cSJacob Faibussowitsch static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 61d71ae5a4SJacob Faibussowitsch { 62a2158755SJunchao Zhang PetscFunctionBegin; 63ab4ee011SJunchao Zhang delete static_cast<PetscDeviceContext_SYCL *>(dctx->data); 64a2158755SJunchao Zhang dctx->data = nullptr; 653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 66*f4f49eeaSPierre Jolivet } 6731d47070SJunchao Zhang 6831d47070SJunchao Zhang static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept 6931d47070SJunchao Zhang { 7031d47070SJunchao Zhang PetscFunctionBegin; 7131d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 7231d47070SJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE; 7331d47070SJunchao Zhang #endif 7431d47070SJunchao Zhang // petsc/sycl currently only uses Kokkos's default execution space (and its queue), 7531d47070SJunchao Zhang // so in some sense, we have only one petsc device context. 7631d47070SJunchao Zhang PetscCall(PetscKokkosInitializeCheck()); 7731d47070SJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue(); 7831d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 79*f4f49eeaSPierre Jolivet } 8031d47070SJunchao Zhang 8131d47070SJunchao Zhang static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept 8231d47070SJunchao Zhang { 8331d47070SJunchao Zhang PetscFunctionBegin; 8431d47070SJunchao Zhang // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc 8531d47070SJunchao Zhang // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE; 8631d47070SJunchao Zhang *idle = PETSC_FALSE; 8731d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 88*f4f49eeaSPierre Jolivet } 8931d47070SJunchao Zhang 9031d47070SJunchao Zhang static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept 9131d47070SJunchao Zhang { 9231d47070SJunchao Zhang PetscBool idle = PETSC_TRUE; 9331d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 9431d47070SJunchao Zhang 9531d47070SJunchao Zhang PetscFunctionBegin; 9631d47070SJunchao Zhang PetscCall(query(dctx, &idle)); 9731d47070SJunchao Zhang if (!idle) PetscCallCXX(dci->queue.wait()); 9831d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 99*f4f49eeaSPierre Jolivet } 10031d47070SJunchao Zhang 10197cd0981SJacob Faibussowitsch static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept 10231d47070SJunchao Zhang { 10331d47070SJunchao Zhang PetscFunctionBegin; 10497cd0981SJacob Faibussowitsch *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue); 10531d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 106*f4f49eeaSPierre Jolivet } 10731d47070SJunchao Zhang 10831d47070SJunchao Zhang static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept 10931d47070SJunchao Zhang { 11031d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 11131d47070SJunchao Zhang 11231d47070SJunchao Zhang PetscFunctionBegin; 11331d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 11431d47070SJunchao Zhang PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?"); 11531d47070SJunchao Zhang dci->timerInUse = PETSC_TRUE; 11631d47070SJunchao Zhang #endif 11731d47070SJunchao Zhang PetscCallCXX(dci->timeBegin = dci->timer.seconds()); 11831d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 119*f4f49eeaSPierre Jolivet } 12031d47070SJunchao Zhang 12131d47070SJunchao Zhang static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept 12231d47070SJunchao Zhang { 12331d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 12431d47070SJunchao Zhang 12531d47070SJunchao Zhang PetscFunctionBegin; 12631d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 12731d47070SJunchao Zhang PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?"); 12831d47070SJunchao Zhang dci->timerInUse = PETSC_FALSE; 12931d47070SJunchao Zhang #endif 13031d47070SJunchao Zhang PetscCallCXX(dci->queue.wait()); 13131d47070SJunchao Zhang PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin); 13231d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 133*f4f49eeaSPierre Jolivet } 13431d47070SJunchao Zhang 135*f4f49eeaSPierre Jolivet static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 136*f4f49eeaSPierre Jolivet static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 137*f4f49eeaSPierre Jolivet static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 138*f4f49eeaSPierre Jolivet static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 139a2158755SJunchao Zhang }; 140a2158755SJunchao Zhang 1410e6b6b59SJacob Faibussowitsch } // namespace impl 14217f48955SJacob Faibussowitsch 1430e6b6b59SJacob Faibussowitsch } // namespace sycl 14417f48955SJacob Faibussowitsch 1450e6b6b59SJacob Faibussowitsch } // namespace device 14617f48955SJacob Faibussowitsch 147a2158755SJunchao Zhang } // namespace Petsc 148a2158755SJunchao Zhang 149d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx) 150d71ae5a4SJacob Faibussowitsch { 1510e6b6b59SJacob Faibussowitsch using namespace Petsc::device::sycl::impl; 15217f48955SJacob Faibussowitsch 15317f48955SJacob Faibussowitsch static const DeviceContext syclctx; 154a2158755SJunchao Zhang 155a2158755SJunchao Zhang PetscFunctionBegin; 156ab4ee011SJunchao Zhang PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL()); 157aea10558SJacob Faibussowitsch dctx->ops[0] = syclctx.ops; 1583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 159a2158755SJunchao Zhang } 160