10e6b6b59SJacob Faibussowitsch #include "sycldevice.hpp" 2a2158755SJunchao Zhang #include <CL/sycl.hpp> 3a2158755SJunchao Zhang 4d71ae5a4SJacob Faibussowitsch namespace Petsc 5d71ae5a4SJacob Faibussowitsch { 6a2158755SJunchao Zhang 7d71ae5a4SJacob Faibussowitsch namespace device 8d71ae5a4SJacob Faibussowitsch { 917f48955SJacob Faibussowitsch 10d71ae5a4SJacob Faibussowitsch namespace sycl 11d71ae5a4SJacob Faibussowitsch { 1217f48955SJacob Faibussowitsch 13d71ae5a4SJacob Faibussowitsch namespace impl 14d71ae5a4SJacob Faibussowitsch { 1517f48955SJacob Faibussowitsch 169371c9d4SSatish Balay class DeviceContext { 17a2158755SJunchao Zhang public: 18a2158755SJunchao Zhang struct PetscDeviceContext_IMPLS { 190e6b6b59SJacob Faibussowitsch ::sycl::event event; 200e6b6b59SJacob Faibussowitsch ::sycl::event begin; // timer-only 210e6b6b59SJacob Faibussowitsch ::sycl::event end; // timer-only 22a2158755SJunchao Zhang #if PetscDefined(USE_DEBUG) 23a2158755SJunchao Zhang PetscBool timerInUse; 24a2158755SJunchao Zhang #endif 25a2158755SJunchao Zhang }; 26a2158755SJunchao Zhang 27a2158755SJunchao Zhang private: 28a2158755SJunchao Zhang static bool initialized_; 29a2158755SJunchao Zhang 30089fb57cSJacob Faibussowitsch static PetscErrorCode finalize_() noexcept 31d71ae5a4SJacob Faibussowitsch { 32a2158755SJunchao Zhang PetscFunctionBegin; 33a2158755SJunchao Zhang initialized_ = false; 343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35a2158755SJunchao Zhang } 36a2158755SJunchao Zhang 37089fb57cSJacob Faibussowitsch static PetscErrorCode initialize_(PetscInt id, DeviceContext *dci) noexcept 38d71ae5a4SJacob Faibussowitsch { 39a2158755SJunchao Zhang PetscFunctionBegin; 409566063dSJacob Faibussowitsch PetscCall(PetscDeviceCheckDeviceCount_Internal(id)); 41a2158755SJunchao Zhang if (!initialized_) { 42a2158755SJunchao Zhang initialized_ = true; 439566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(finalize_)); 44a2158755SJunchao Zhang } 453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 46a2158755SJunchao Zhang } 47a2158755SJunchao Zhang 48a2158755SJunchao Zhang public: 493f675fcfSPierre Jolivet const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; 50a2158755SJunchao Zhang 51a2158755SJunchao Zhang // default constructor 5217f48955SJacob Faibussowitsch DeviceContext() noexcept = default; 53a2158755SJunchao Zhang 54a2158755SJunchao Zhang // All of these functions MUST be static in order to be callable from C, otherwise they 55a2158755SJunchao Zhang // get the implicit 'this' pointer tacked on 56089fb57cSJacob Faibussowitsch static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 57d71ae5a4SJacob Faibussowitsch { 58a2158755SJunchao Zhang PetscFunctionBegin; 59a2158755SJunchao Zhang delete static_cast<PetscDeviceContext_IMPLS *>(dctx->data); 60a2158755SJunchao Zhang dctx->data = nullptr; 613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 62a2158755SJunchao Zhang }; 63*31d47070SJunchao Zhang 64*31d47070SJunchao Zhang static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept 65*31d47070SJunchao Zhang { 66*31d47070SJunchao Zhang PetscFunctionBegin; 67*31d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 68*31d47070SJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE; 69*31d47070SJunchao Zhang #endif 70*31d47070SJunchao Zhang // petsc/sycl currently only uses Kokkos's default execution space (and its queue), 71*31d47070SJunchao Zhang // so in some sense, we have only one petsc device context. 72*31d47070SJunchao Zhang PetscCall(PetscKokkosInitializeCheck()); 73*31d47070SJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue(); 74*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 75*31d47070SJunchao Zhang }; 76*31d47070SJunchao Zhang 77*31d47070SJunchao Zhang static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept 78*31d47070SJunchao Zhang { 79*31d47070SJunchao Zhang PetscFunctionBegin; 80*31d47070SJunchao Zhang // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc 81*31d47070SJunchao Zhang // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE; 82*31d47070SJunchao Zhang *idle = PETSC_FALSE; 83*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 84*31d47070SJunchao Zhang }; 85*31d47070SJunchao Zhang 86*31d47070SJunchao Zhang static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept 87*31d47070SJunchao Zhang { 88*31d47070SJunchao Zhang PetscBool idle = PETSC_TRUE; 89*31d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 90*31d47070SJunchao Zhang 91*31d47070SJunchao Zhang PetscFunctionBegin; 92*31d47070SJunchao Zhang PetscCall(query(dctx, &idle)); 93*31d47070SJunchao Zhang if (!idle) PetscCallCXX(dci->queue.wait()); 94*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 95*31d47070SJunchao Zhang }; 96*31d47070SJunchao Zhang 97*31d47070SJunchao Zhang static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void *handle) noexcept 98*31d47070SJunchao Zhang { 99*31d47070SJunchao Zhang PetscFunctionBegin; 100*31d47070SJunchao Zhang *static_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue); 101*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 102*31d47070SJunchao Zhang }; 103*31d47070SJunchao Zhang 104*31d47070SJunchao Zhang static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept 105*31d47070SJunchao Zhang { 106*31d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 107*31d47070SJunchao Zhang 108*31d47070SJunchao Zhang PetscFunctionBegin; 109*31d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 110*31d47070SJunchao Zhang PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?"); 111*31d47070SJunchao Zhang dci->timerInUse = PETSC_TRUE; 112*31d47070SJunchao Zhang #endif 113*31d47070SJunchao Zhang PetscCallCXX(dci->timeBegin = dci->timer.seconds()); 114*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 115*31d47070SJunchao Zhang }; 116*31d47070SJunchao Zhang 117*31d47070SJunchao Zhang static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept 118*31d47070SJunchao Zhang { 119*31d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 120*31d47070SJunchao Zhang 121*31d47070SJunchao Zhang PetscFunctionBegin; 122*31d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 123*31d47070SJunchao Zhang PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?"); 124*31d47070SJunchao Zhang dci->timerInUse = PETSC_FALSE; 125*31d47070SJunchao Zhang #endif 126*31d47070SJunchao Zhang PetscCallCXX(dci->queue.wait()); 127*31d47070SJunchao Zhang PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin); 128*31d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 129*31d47070SJunchao Zhang }; 130*31d47070SJunchao Zhang 131089fb57cSJacob Faibussowitsch static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 132089fb57cSJacob Faibussowitsch static PetscErrorCode setUp(PetscDeviceContext) noexcept { return PETSC_SUCCESS; }; // Nothing to setup 133089fb57cSJacob Faibussowitsch static PetscErrorCode query(PetscDeviceContext, PetscBool *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 134089fb57cSJacob Faibussowitsch static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 135089fb57cSJacob Faibussowitsch static PetscErrorCode synchronize(PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 136089fb57cSJacob Faibussowitsch static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 137089fb57cSJacob Faibussowitsch static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 138089fb57cSJacob Faibussowitsch static PetscErrorCode getStreamHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 139089fb57cSJacob Faibussowitsch static PetscErrorCode beginTimer(PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 140089fb57cSJacob Faibussowitsch static PetscErrorCode endTimer(PetscDeviceContext, PetscLogDouble *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }; 141a2158755SJunchao Zhang }; 142a2158755SJunchao Zhang 1430e6b6b59SJacob Faibussowitsch } // namespace impl 14417f48955SJacob Faibussowitsch 1450e6b6b59SJacob Faibussowitsch } // namespace sycl 14617f48955SJacob Faibussowitsch 1470e6b6b59SJacob Faibussowitsch } // namespace device 14817f48955SJacob Faibussowitsch 149a2158755SJunchao Zhang } // namespace Petsc 150a2158755SJunchao Zhang 151d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx) 152d71ae5a4SJacob Faibussowitsch { 1530e6b6b59SJacob Faibussowitsch using namespace Petsc::device::sycl::impl; 15417f48955SJacob Faibussowitsch 15517f48955SJacob Faibussowitsch static const DeviceContext syclctx; 156a2158755SJunchao Zhang 157a2158755SJunchao Zhang PetscFunctionBegin; 15817f48955SJacob Faibussowitsch dctx->data = new DeviceContext::PetscDeviceContext_IMPLS(); 1599566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(dctx->ops, &syclctx.ops, sizeof(syclctx.ops))); 1603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 161a2158755SJunchao Zhang } 162