xref: /petsc/src/sys/objects/kokkos/kinit.kokkos.cxx (revision e907feaad8a9ad15ff003b1a1f8acb2ecb25e843)
1a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h>
2*e907feaaSJunchao Zhang #include <petsc/private/kokkosimpl.hpp>
30e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h>
4524fe776SJunchao Zhang #include <petsc_kokkos.hpp>
5c2b86a48SJunchao Zhang 
645639126SStefano Zampini PetscBool    PetscKokkosInitialized = PETSC_FALSE;
7*e907feaaSJunchao Zhang PetscScalar *PetscScalarPool        = nullptr;
8*e907feaaSJunchao Zhang PetscInt     PetscScalarPoolSize    = 0;
945639126SStefano Zampini 
10524fe776SJunchao Zhang Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr;
11524fe776SJunchao Zhang 
12d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosFinalize_Private(void)
13d71ae5a4SJacob Faibussowitsch {
14c2b86a48SJunchao Zhang   PetscFunctionBegin;
15524fe776SJunchao Zhang   PetscCallCXX(delete PetscKokkosExecutionSpacePtr);
16*e907feaaSJunchao Zhang   PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool));
17*e907feaaSJunchao Zhang   PetscScalarPoolSize = 0;
18*e907feaaSJunchao Zhang   if (PetscBeganKokkos) {
19*e907feaaSJunchao Zhang     PetscCallCXX(Kokkos::finalize());
20*e907feaaSJunchao Zhang     PetscBeganKokkos = PETSC_FALSE;
21*e907feaaSJunchao Zhang   }
223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
23c2b86a48SJunchao Zhang }
24c2b86a48SJunchao Zhang 
25d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized)
26d71ae5a4SJacob Faibussowitsch {
27c2b86a48SJunchao Zhang   PetscFunctionBegin;
28c2b86a48SJunchao Zhang   *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE;
293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30c2b86a48SJunchao Zhang }
31375e5adfSJunchao Zhang 
3234766dafSJunchao Zhang /* Initialize Kokkos if not yet */
33d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosInitializeCheck(void)
34d71ae5a4SJacob Faibussowitsch {
35375e5adfSJunchao Zhang   PetscFunctionBegin;
3634766dafSJunchao Zhang   if (!Kokkos::is_initialized()) {
37471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
38c66e0907SJunchao Zhang     auto args = Kokkos::InitializationSettings();
39c66e0907SJunchao Zhang #else
4062825ce1SJacob Faibussowitsch     auto args             = Kokkos::InitArguments{}; /* use default constructor */
41c66e0907SJunchao Zhang #endif
42b84ac304SJunchao Zhang 
4362825ce1SJacob Faibussowitsch #if (defined(KOKKOS_ENABLE_CUDA) && PetscDefined(HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && PetscDefined(HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && PetscDefined(HAVE_SYCL))
4462825ce1SJacob Faibussowitsch     /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */
45ab4ee011SJunchao Zhang     PetscDevice device;
46ab4ee011SJunchao Zhang     PetscInt    deviceId;
47ab4ee011SJunchao Zhang     PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device));
48ab4ee011SJunchao Zhang     PetscCall(PetscDeviceGetDeviceId(device, &deviceId));
49ab4ee011SJunchao Zhang     PetscCall(PetscDeviceDestroy(&device));
50ab4ee011SJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0)
51ab4ee011SJunchao Zhang     // if device_id is not set, and no gpus have been found, kokkos will use CPU
52ab4ee011SJunchao Zhang     if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId));
53ab4ee011SJunchao Zhang   #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
54ab4ee011SJunchao Zhang     args.set_device_id(static_cast<int>(deviceId));
55c66e0907SJunchao Zhang   #else
56ab4ee011SJunchao Zhang     PetscCall(PetscMPIIntCast(deviceId, &args.device_id));
57375e5adfSJunchao Zhang   #endif
58c66e0907SJunchao Zhang #endif
5911f0be55SJunchao Zhang 
60471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
61c66e0907SJunchao Zhang     args.set_disable_warnings(!PetscDefined(HAVE_KOKKOS_INIT_WARNINGS));
62c66e0907SJunchao Zhang #else
6362825ce1SJacob Faibussowitsch     args.disable_warnings = !PetscDefined(HAVE_KOKKOS_INIT_WARNINGS);
64c66e0907SJunchao Zhang #endif
6562825ce1SJacob Faibussowitsch 
6611f0be55SJunchao Zhang     /* To use PetscNumOMPThreads, one has to configure petsc --with-openmp.
6711f0be55SJunchao Zhang        Otherwise, let's keep the default value (-1) of args.num_threads.
6811f0be55SJunchao Zhang     */
6962825ce1SJacob Faibussowitsch #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP)
70471471fdSJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
71c66e0907SJunchao Zhang     args.set_num_threads(PetscNumOMPThreads);
72c66e0907SJunchao Zhang   #else
7311f0be55SJunchao Zhang     args.num_threads = PetscNumOMPThreads;
7411f0be55SJunchao Zhang   #endif
75c66e0907SJunchao Zhang #endif
76ab4ee011SJunchao Zhang     PetscCallCXX(Kokkos::initialize(args));
779c9deb76SJunchao Zhang     PetscBeganKokkos = PETSC_TRUE;
789c9deb76SJunchao Zhang   }
799c9deb76SJunchao Zhang   if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by petsc or by user, we need to init PetscKokkosExecutionSpacePtr
80524fe776SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
81524fe776SJunchao Zhang     extern cudaStream_t PetscDefaultCudaStream;
82524fe776SJunchao Zhang     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream));
83524fe776SJunchao Zhang #elif defined(PETS_HAVE_HIP)
84524fe776SJunchao Zhang     extern hipStream_t PetscDefaultHipStream;
85524fe776SJunchao Zhang     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream));
86524fe776SJunchao Zhang #else
87524fe776SJunchao Zhang     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace());
88524fe776SJunchao Zhang #endif
8959e55d94SJunchao Zhang   }
90*e907feaaSJunchao Zhang   if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars
91*e907feaaSJunchao Zhang     PetscScalarPoolSize = 1024;
92*e907feaaSJunchao Zhang     PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize)));
93*e907feaaSJunchao Zhang   }
94*e907feaaSJunchao Zhang 
95*e907feaaSJunchao Zhang   PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called
963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
97375e5adfSJunchao Zhang }
98