1a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h> 2e907feaaSJunchao Zhang #include <petsc/private/kokkosimpl.hpp> 30e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h> 4524fe776SJunchao Zhang #include <petsc_kokkos.hpp> 5c9903f8fSJunchao Zhang #include <petscdevice_cupm.h> 6c2b86a48SJunchao Zhang 7f0b74427SPierre Jolivet PetscBool PetscKokkosInitialized = PETSC_FALSE; // Has Kokkos been initialized (either by PETSc or by users)? 8e907feaaSJunchao Zhang PetscScalar *PetscScalarPool = nullptr; 9e907feaaSJunchao Zhang PetscInt PetscScalarPoolSize = 0; 1045639126SStefano Zampini 11524fe776SJunchao Zhang Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr; 12524fe776SJunchao Zhang 13d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosFinalize_Private(void) 14d71ae5a4SJacob Faibussowitsch { 15c2b86a48SJunchao Zhang PetscFunctionBegin; 16524fe776SJunchao Zhang PetscCallCXX(delete PetscKokkosExecutionSpacePtr); 17fde95f08SJunchao Zhang PetscKokkosExecutionSpacePtr = nullptr; 18e907feaaSJunchao Zhang PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool)); 19e907feaaSJunchao Zhang PetscScalarPoolSize = 0; 20e907feaaSJunchao Zhang if (PetscBeganKokkos) { 21e907feaaSJunchao Zhang PetscCallCXX(Kokkos::finalize()); 22e907feaaSJunchao Zhang PetscBeganKokkos = PETSC_FALSE; 23e907feaaSJunchao Zhang } 243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25c2b86a48SJunchao Zhang } 26c2b86a48SJunchao Zhang 27d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized) 28d71ae5a4SJacob Faibussowitsch { 29c2b86a48SJunchao Zhang PetscFunctionBegin; 30c2b86a48SJunchao Zhang *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE; 313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32c2b86a48SJunchao Zhang } 33375e5adfSJunchao Zhang 3434766dafSJunchao Zhang /* Initialize Kokkos if not yet */ 35d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosInitializeCheck(void) 36d71ae5a4SJacob Faibussowitsch { 37375e5adfSJunchao Zhang PetscFunctionBegin; 3834766dafSJunchao Zhang if (!Kokkos::is_initialized()) { 39471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 40c66e0907SJunchao Zhang auto args = Kokkos::InitializationSettings(); 41c66e0907SJunchao Zhang #else 4262825ce1SJacob Faibussowitsch auto args = Kokkos::InitArguments{}; /* use default constructor */ 43c66e0907SJunchao Zhang #endif 44b84ac304SJunchao Zhang 45c9903f8fSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL)) 4662825ce1SJacob Faibussowitsch /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */ 47ab4ee011SJunchao Zhang PetscDevice device; 48ab4ee011SJunchao Zhang PetscInt deviceId; 49ab4ee011SJunchao Zhang PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device)); 50ab4ee011SJunchao Zhang PetscCall(PetscDeviceGetDeviceId(device, &deviceId)); 51ab4ee011SJunchao Zhang PetscCall(PetscDeviceDestroy(&device)); 52ab4ee011SJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0) 53ab4ee011SJunchao Zhang // if device_id is not set, and no gpus have been found, kokkos will use CPU 54ab4ee011SJunchao Zhang if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId)); 55ab4ee011SJunchao Zhang #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 56ab4ee011SJunchao Zhang args.set_device_id(static_cast<int>(deviceId)); 57c66e0907SJunchao Zhang #else 58ab4ee011SJunchao Zhang PetscCall(PetscMPIIntCast(deviceId, &args.device_id)); 59375e5adfSJunchao Zhang #endif 60c66e0907SJunchao Zhang #endif 6111f0be55SJunchao Zhang 62f0b74427SPierre Jolivet /* To use PetscNumOMPThreads, one has to configure PETSc --with-openmp. 6311f0be55SJunchao Zhang Otherwise, let's keep the default value (-1) of args.num_threads. 6411f0be55SJunchao Zhang */ 6562825ce1SJacob Faibussowitsch #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP) 66471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 67c66e0907SJunchao Zhang args.set_num_threads(PetscNumOMPThreads); 68c66e0907SJunchao Zhang #else 6911f0be55SJunchao Zhang args.num_threads = PetscNumOMPThreads; 7011f0be55SJunchao Zhang #endif 71c66e0907SJunchao Zhang #endif 72ab4ee011SJunchao Zhang PetscCallCXX(Kokkos::initialize(args)); 739c9deb76SJunchao Zhang PetscBeganKokkos = PETSC_TRUE; 749c9deb76SJunchao Zhang } 75c9903f8fSJunchao Zhang 76f0b74427SPierre Jolivet if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by PETSc or by user, we need to init PetscKokkosExecutionSpacePtr 77*7a4760caSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL)) 78c9903f8fSJunchao Zhang PetscDeviceContext dctx; 79c9903f8fSJunchao Zhang PetscDeviceType dtype; 80c9903f8fSJunchao Zhang 81a9949e74SJunchao Zhang PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); // it internally sets PetscDefaultCuda/HipStream 82a9949e74SJunchao Zhang PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 83c9903f8fSJunchao Zhang 84524fe776SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 85c9903f8fSJunchao Zhang if (dtype == PETSC_DEVICE_CUDA) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream)); 86c9903f8fSJunchao Zhang #elif defined(PETSC_HAVE_HIP) 87c9903f8fSJunchao Zhang if (dtype == PETSC_DEVICE_HIP) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream)); 88*7a4760caSJunchao Zhang #elif defined(PETSC_HAVE_SYCL) 89*7a4760caSJunchao Zhang if (dtype == PETSC_DEVICE_SYCL) { 90*7a4760caSJunchao Zhang void *handle; 91*7a4760caSJunchao Zhang PetscCall(PetscDeviceContextGetStreamHandle(dctx, &handle)); // Kind of PetscDefaultSyclStream 92*7a4760caSJunchao Zhang PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(*(sycl::queue *)handle)); 93*7a4760caSJunchao Zhang } 94c9903f8fSJunchao Zhang #endif 95524fe776SJunchao Zhang #else 96c9903f8fSJunchao Zhang // In all other cases, we use Kokkos default 97524fe776SJunchao Zhang PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace()); 98524fe776SJunchao Zhang #endif 9959e55d94SJunchao Zhang } 100c9903f8fSJunchao Zhang 101e907feaaSJunchao Zhang if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars 102e907feaaSJunchao Zhang PetscScalarPoolSize = 1024; 103e907feaaSJunchao Zhang PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize))); 104e907feaaSJunchao Zhang } 105e907feaaSJunchao Zhang 106e907feaaSJunchao Zhang PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called 1073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 108375e5adfSJunchao Zhang } 109