1a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h> 2*e907feaaSJunchao Zhang #include <petsc/private/kokkosimpl.hpp> 30e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h> 4524fe776SJunchao Zhang #include <petsc_kokkos.hpp> 5c2b86a48SJunchao Zhang 645639126SStefano Zampini PetscBool PetscKokkosInitialized = PETSC_FALSE; 7*e907feaaSJunchao Zhang PetscScalar *PetscScalarPool = nullptr; 8*e907feaaSJunchao Zhang PetscInt PetscScalarPoolSize = 0; 945639126SStefano Zampini 10524fe776SJunchao Zhang Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr; 11524fe776SJunchao Zhang 12d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosFinalize_Private(void) 13d71ae5a4SJacob Faibussowitsch { 14c2b86a48SJunchao Zhang PetscFunctionBegin; 15524fe776SJunchao Zhang PetscCallCXX(delete PetscKokkosExecutionSpacePtr); 16*e907feaaSJunchao Zhang PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool)); 17*e907feaaSJunchao Zhang PetscScalarPoolSize = 0; 18*e907feaaSJunchao Zhang if (PetscBeganKokkos) { 19*e907feaaSJunchao Zhang PetscCallCXX(Kokkos::finalize()); 20*e907feaaSJunchao Zhang PetscBeganKokkos = PETSC_FALSE; 21*e907feaaSJunchao Zhang } 223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 23c2b86a48SJunchao Zhang } 24c2b86a48SJunchao Zhang 25d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized) 26d71ae5a4SJacob Faibussowitsch { 27c2b86a48SJunchao Zhang PetscFunctionBegin; 28c2b86a48SJunchao Zhang *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE; 293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30c2b86a48SJunchao Zhang } 31375e5adfSJunchao Zhang 3234766dafSJunchao Zhang /* Initialize Kokkos if not yet */ 33d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosInitializeCheck(void) 34d71ae5a4SJacob Faibussowitsch { 35375e5adfSJunchao Zhang PetscFunctionBegin; 3634766dafSJunchao Zhang if (!Kokkos::is_initialized()) { 37471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 38c66e0907SJunchao Zhang auto args = Kokkos::InitializationSettings(); 39c66e0907SJunchao Zhang #else 4062825ce1SJacob Faibussowitsch auto args = Kokkos::InitArguments{}; /* use default constructor */ 41c66e0907SJunchao Zhang #endif 42b84ac304SJunchao Zhang 4362825ce1SJacob Faibussowitsch #if (defined(KOKKOS_ENABLE_CUDA) && PetscDefined(HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && PetscDefined(HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && PetscDefined(HAVE_SYCL)) 4462825ce1SJacob Faibussowitsch /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */ 45ab4ee011SJunchao Zhang PetscDevice device; 46ab4ee011SJunchao Zhang PetscInt deviceId; 47ab4ee011SJunchao Zhang PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device)); 48ab4ee011SJunchao Zhang PetscCall(PetscDeviceGetDeviceId(device, &deviceId)); 49ab4ee011SJunchao Zhang PetscCall(PetscDeviceDestroy(&device)); 50ab4ee011SJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0) 51ab4ee011SJunchao Zhang // if device_id is not set, and no gpus have been found, kokkos will use CPU 52ab4ee011SJunchao Zhang if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId)); 53ab4ee011SJunchao Zhang #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 54ab4ee011SJunchao Zhang args.set_device_id(static_cast<int>(deviceId)); 55c66e0907SJunchao Zhang #else 56ab4ee011SJunchao Zhang PetscCall(PetscMPIIntCast(deviceId, &args.device_id)); 57375e5adfSJunchao Zhang #endif 58c66e0907SJunchao Zhang #endif 5911f0be55SJunchao Zhang 60471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 61c66e0907SJunchao Zhang args.set_disable_warnings(!PetscDefined(HAVE_KOKKOS_INIT_WARNINGS)); 62c66e0907SJunchao Zhang #else 6362825ce1SJacob Faibussowitsch args.disable_warnings = !PetscDefined(HAVE_KOKKOS_INIT_WARNINGS); 64c66e0907SJunchao Zhang #endif 6562825ce1SJacob Faibussowitsch 6611f0be55SJunchao Zhang /* To use PetscNumOMPThreads, one has to configure petsc --with-openmp. 6711f0be55SJunchao Zhang Otherwise, let's keep the default value (-1) of args.num_threads. 6811f0be55SJunchao Zhang */ 6962825ce1SJacob Faibussowitsch #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP) 70471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 71c66e0907SJunchao Zhang args.set_num_threads(PetscNumOMPThreads); 72c66e0907SJunchao Zhang #else 7311f0be55SJunchao Zhang args.num_threads = PetscNumOMPThreads; 7411f0be55SJunchao Zhang #endif 75c66e0907SJunchao Zhang #endif 76ab4ee011SJunchao Zhang PetscCallCXX(Kokkos::initialize(args)); 779c9deb76SJunchao Zhang PetscBeganKokkos = PETSC_TRUE; 789c9deb76SJunchao Zhang } 799c9deb76SJunchao Zhang if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by petsc or by user, we need to init PetscKokkosExecutionSpacePtr 80524fe776SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 81524fe776SJunchao Zhang extern cudaStream_t PetscDefaultCudaStream; 82524fe776SJunchao Zhang PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream)); 83524fe776SJunchao Zhang #elif defined(PETS_HAVE_HIP) 84524fe776SJunchao Zhang extern hipStream_t PetscDefaultHipStream; 85524fe776SJunchao Zhang PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream)); 86524fe776SJunchao Zhang #else 87524fe776SJunchao Zhang PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace()); 88524fe776SJunchao Zhang #endif 8959e55d94SJunchao Zhang } 90*e907feaaSJunchao Zhang if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars 91*e907feaaSJunchao Zhang PetscScalarPoolSize = 1024; 92*e907feaaSJunchao Zhang PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize))); 93*e907feaaSJunchao Zhang } 94*e907feaaSJunchao Zhang 95*e907feaaSJunchao Zhang PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called 963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 97375e5adfSJunchao Zhang } 98