10e6b6b59SJacob Faibussowitsch #include "sycldevice.hpp" 245a61cdeSJunchao Zhang #include <sycl/sycl.hpp> 3*7a4760caSJunchao Zhang #include <chrono> 4a2158755SJunchao Zhang 5d71ae5a4SJacob Faibussowitsch namespace Petsc 6d71ae5a4SJacob Faibussowitsch { 7a2158755SJunchao Zhang 8d71ae5a4SJacob Faibussowitsch namespace device 9d71ae5a4SJacob Faibussowitsch { 1017f48955SJacob Faibussowitsch 11d71ae5a4SJacob Faibussowitsch namespace sycl 12d71ae5a4SJacob Faibussowitsch { 1317f48955SJacob Faibussowitsch 14d71ae5a4SJacob Faibussowitsch namespace impl 15d71ae5a4SJacob Faibussowitsch { 1617f48955SJacob Faibussowitsch 179371c9d4SSatish Balay class DeviceContext { 18a2158755SJunchao Zhang public: 19ab4ee011SJunchao Zhang struct PetscDeviceContext_SYCL { 200e6b6b59SJacob Faibussowitsch ::sycl::event event; 210e6b6b59SJacob Faibussowitsch ::sycl::event begin; // timer-only 220e6b6b59SJacob Faibussowitsch ::sycl::event end; // timer-only 23a2158755SJunchao Zhang #if PetscDefined(USE_DEBUG) 24ab4ee011SJunchao Zhang PetscBool timerInUse{}; 25a2158755SJunchao Zhang #endif 26ab4ee011SJunchao Zhang ::sycl::queue queue; 27*7a4760caSJunchao Zhang 28*7a4760caSJunchao Zhang std::chrono::time_point<std::chrono::steady_clock> timeBegin{}; 29a2158755SJunchao Zhang }; 30a2158755SJunchao Zhang 31a2158755SJunchao Zhang private: 32a2158755SJunchao Zhang static bool initialized_; 33a2158755SJunchao Zhang 34089fb57cSJacob Faibussowitsch static PetscErrorCode finalize_() noexcept 35d71ae5a4SJacob Faibussowitsch { 36a2158755SJunchao Zhang PetscFunctionBegin; 37a2158755SJunchao Zhang initialized_ = false; 383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39a2158755SJunchao Zhang } 40a2158755SJunchao Zhang 41ab4ee011SJunchao Zhang static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept 42d71ae5a4SJacob Faibussowitsch { 43a2158755SJunchao Zhang PetscFunctionBegin; 449566063dSJacob Faibussowitsch PetscCall(PetscDeviceCheckDeviceCount_Internal(id)); 45a2158755SJunchao Zhang if (!initialized_) { 46a2158755SJunchao Zhang initialized_ = true; 479566063dSJacob Faibussowitsch PetscCall(PetscRegisterFinalize(finalize_)); 48a2158755SJunchao Zhang } 493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50a2158755SJunchao Zhang } 51a2158755SJunchao Zhang 52a2158755SJunchao Zhang public: 533f675fcfSPierre Jolivet const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; 54a2158755SJunchao Zhang 55a2158755SJunchao Zhang // default constructor 5617f48955SJacob Faibussowitsch DeviceContext() noexcept = default; 57a2158755SJunchao Zhang 58a2158755SJunchao Zhang // All of these functions MUST be static in order to be callable from C, otherwise they 59a2158755SJunchao Zhang // get the implicit 'this' pointer tacked on 60089fb57cSJacob Faibussowitsch static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 61d71ae5a4SJacob Faibussowitsch { 62a2158755SJunchao Zhang PetscFunctionBegin; 63ab4ee011SJunchao Zhang delete static_cast<PetscDeviceContext_SYCL *>(dctx->data); 64a2158755SJunchao Zhang dctx->data = nullptr; 653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 66f4f49eeaSPierre Jolivet } 6731d47070SJunchao Zhang 6831d47070SJunchao Zhang static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept 6931d47070SJunchao Zhang { 70*7a4760caSJunchao Zhang PetscDevice dev; 71*7a4760caSJunchao Zhang PetscInt id; 72*7a4760caSJunchao Zhang 7331d47070SJunchao Zhang PetscFunctionBegin; 7431d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 7531d47070SJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE; 7631d47070SJunchao Zhang #endif 77*7a4760caSJunchao Zhang PetscCall(PetscDeviceContextGetDevice(dctx, &dev)); 78*7a4760caSJunchao Zhang PetscCall(PetscDeviceGetDeviceId(dev, &id)); 79*7a4760caSJunchao Zhang const ::sycl::device &syclDevice = (id == PETSC_SYCL_DEVICE_HOST) ? ::sycl::device(::sycl::cpu_selector_v) : ::sycl::device::get_devices(::sycl::info::device_type::gpu)[id]; 80*7a4760caSJunchao Zhang 81*7a4760caSJunchao Zhang static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = ::sycl::queue(syclDevice, ::sycl::property::queue::in_order()); 8231d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 83f4f49eeaSPierre Jolivet } 8431d47070SJunchao Zhang 8531d47070SJunchao Zhang static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept 8631d47070SJunchao Zhang { 8731d47070SJunchao Zhang PetscFunctionBegin; 8831d47070SJunchao Zhang // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc 8931d47070SJunchao Zhang // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE; 9031d47070SJunchao Zhang *idle = PETSC_FALSE; 9131d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 92f4f49eeaSPierre Jolivet } 9331d47070SJunchao Zhang 9431d47070SJunchao Zhang static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept 9531d47070SJunchao Zhang { 9631d47070SJunchao Zhang PetscBool idle = PETSC_TRUE; 9731d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 9831d47070SJunchao Zhang 9931d47070SJunchao Zhang PetscFunctionBegin; 10031d47070SJunchao Zhang PetscCall(query(dctx, &idle)); 10131d47070SJunchao Zhang if (!idle) PetscCallCXX(dci->queue.wait()); 10231d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 103f4f49eeaSPierre Jolivet } 10431d47070SJunchao Zhang 10597cd0981SJacob Faibussowitsch static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept 10631d47070SJunchao Zhang { 10731d47070SJunchao Zhang PetscFunctionBegin; 10897cd0981SJacob Faibussowitsch *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue); 10931d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 110f4f49eeaSPierre Jolivet } 11131d47070SJunchao Zhang 11231d47070SJunchao Zhang static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept 11331d47070SJunchao Zhang { 11431d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 11531d47070SJunchao Zhang 11631d47070SJunchao Zhang PetscFunctionBegin; 11731d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 11831d47070SJunchao Zhang PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?"); 11931d47070SJunchao Zhang dci->timerInUse = PETSC_TRUE; 12031d47070SJunchao Zhang #endif 121*7a4760caSJunchao Zhang // It is not a good approach to time SYCL kernels because the timer starts at the kernel launch time at host, 122*7a4760caSJunchao Zhang // not at the start of execution time on device. SYCL provides this style of kernel timing: 123*7a4760caSJunchao Zhang /* 124*7a4760caSJunchao Zhang sycl::queue q(sycl::default_selector_v, sycl::property::queue::enable_profiling{}); 125*7a4760caSJunchao Zhang sycl::event e = q.submit([&](sycl::handler &h) { 126*7a4760caSJunchao Zhang ... 127*7a4760caSJunchao Zhang }); 128*7a4760caSJunchao Zhang e.wait(); 129*7a4760caSJunchao Zhang auto start_time = e.get_profiling_info<sycl::info::event_profiling::command_start>(); 130*7a4760caSJunchao Zhang auto end_time = e.get_profiling_info<sycl::info::event_profiling::command_end>(); 131*7a4760caSJunchao Zhang long long kernel_duration_ns = end_time - start_time; 132*7a4760caSJunchao Zhang */ 133*7a4760caSJunchao Zhang // It requires 1) enable profiling at the queue's creation time, and 2) store the event returned by kernel launch. 134*7a4760caSJunchao Zhang // But neither we have control of the input queue, nor does PetscDeviceContext support 2), so we just use a 135*7a4760caSJunchao Zhang // host side timer. 136*7a4760caSJunchao Zhang PetscCallCXX(dci->timeBegin = std::chrono::steady_clock::now()); 13731d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 138f4f49eeaSPierre Jolivet } 13931d47070SJunchao Zhang 14031d47070SJunchao Zhang static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept 14131d47070SJunchao Zhang { 14231d47070SJunchao Zhang const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 143*7a4760caSJunchao Zhang std::chrono::duration<double> duration; 14431d47070SJunchao Zhang 14531d47070SJunchao Zhang PetscFunctionBegin; 14631d47070SJunchao Zhang #if PetscDefined(USE_DEBUG) 14731d47070SJunchao Zhang PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?"); 14831d47070SJunchao Zhang dci->timerInUse = PETSC_FALSE; 14931d47070SJunchao Zhang #endif 15031d47070SJunchao Zhang PetscCallCXX(dci->queue.wait()); 151*7a4760caSJunchao Zhang PetscCallCXX(duration = std::chrono::steady_clock::now() - dci->timeBegin); 152*7a4760caSJunchao Zhang PetscCallCXX(*elapsed = duration.count()); 15331d47070SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 154f4f49eeaSPierre Jolivet } 15531d47070SJunchao Zhang 156f4f49eeaSPierre Jolivet static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 157f4f49eeaSPierre Jolivet static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 158f4f49eeaSPierre Jolivet static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 159f4f49eeaSPierre Jolivet static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 160a2158755SJunchao Zhang }; 161a2158755SJunchao Zhang 1620e6b6b59SJacob Faibussowitsch } // namespace impl 16317f48955SJacob Faibussowitsch 1640e6b6b59SJacob Faibussowitsch } // namespace sycl 16517f48955SJacob Faibussowitsch 1660e6b6b59SJacob Faibussowitsch } // namespace device 16717f48955SJacob Faibussowitsch 168a2158755SJunchao Zhang } // namespace Petsc 169a2158755SJunchao Zhang 170d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx) 171d71ae5a4SJacob Faibussowitsch { 1720e6b6b59SJacob Faibussowitsch using namespace Petsc::device::sycl::impl; 17317f48955SJacob Faibussowitsch 17417f48955SJacob Faibussowitsch static const DeviceContext syclctx; 175a2158755SJunchao Zhang 176a2158755SJunchao Zhang PetscFunctionBegin; 177ab4ee011SJunchao Zhang PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL()); 178aea10558SJacob Faibussowitsch dctx->ops[0] = syclctx.ops; 1793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 180a2158755SJunchao Zhang } 181