xref: /petsc/src/sys/objects/kokkos/kinit.kokkos.cxx (revision c9903f8f7d70b7888be5f715d81c083b4e1caa44)
1a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h>
2e907feaaSJunchao Zhang #include <petsc/private/kokkosimpl.hpp>
30e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h>
4524fe776SJunchao Zhang #include <petsc_kokkos.hpp>
5*c9903f8fSJunchao Zhang #include <petscdevice_cupm.h>
6c2b86a48SJunchao Zhang 
7*c9903f8fSJunchao Zhang PetscBool    PetscKokkosInitialized = PETSC_FALSE; // Has Kokkos been initialized (either by petsc or by users)?
8e907feaaSJunchao Zhang PetscScalar *PetscScalarPool        = nullptr;
9e907feaaSJunchao Zhang PetscInt     PetscScalarPoolSize    = 0;
1045639126SStefano Zampini 
11524fe776SJunchao Zhang Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr;
12524fe776SJunchao Zhang 
13d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosFinalize_Private(void)
14d71ae5a4SJacob Faibussowitsch {
15c2b86a48SJunchao Zhang   PetscFunctionBegin;
16524fe776SJunchao Zhang   PetscCallCXX(delete PetscKokkosExecutionSpacePtr);
17e907feaaSJunchao Zhang   PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool));
18e907feaaSJunchao Zhang   PetscScalarPoolSize = 0;
19e907feaaSJunchao Zhang   if (PetscBeganKokkos) {
20e907feaaSJunchao Zhang     PetscCallCXX(Kokkos::finalize());
21e907feaaSJunchao Zhang     PetscBeganKokkos = PETSC_FALSE;
22e907feaaSJunchao Zhang   }
233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24c2b86a48SJunchao Zhang }
25c2b86a48SJunchao Zhang 
26d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized)
27d71ae5a4SJacob Faibussowitsch {
28c2b86a48SJunchao Zhang   PetscFunctionBegin;
29c2b86a48SJunchao Zhang   *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE;
303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31c2b86a48SJunchao Zhang }
32375e5adfSJunchao Zhang 
3334766dafSJunchao Zhang /* Initialize Kokkos if not yet */
34d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosInitializeCheck(void)
35d71ae5a4SJacob Faibussowitsch {
36375e5adfSJunchao Zhang   PetscFunctionBegin;
3734766dafSJunchao Zhang   if (!Kokkos::is_initialized()) {
38471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
39c66e0907SJunchao Zhang     auto args = Kokkos::InitializationSettings();
40c66e0907SJunchao Zhang #else
4162825ce1SJacob Faibussowitsch     auto args = Kokkos::InitArguments{}; /* use default constructor */
42c66e0907SJunchao Zhang #endif
43b84ac304SJunchao Zhang 
44*c9903f8fSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL))
4562825ce1SJacob Faibussowitsch     /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */
46ab4ee011SJunchao Zhang     PetscDevice device;
47ab4ee011SJunchao Zhang     PetscInt    deviceId;
48ab4ee011SJunchao Zhang     PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device));
49ab4ee011SJunchao Zhang     PetscCall(PetscDeviceGetDeviceId(device, &deviceId));
50ab4ee011SJunchao Zhang     PetscCall(PetscDeviceDestroy(&device));
51ab4ee011SJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0)
52ab4ee011SJunchao Zhang     // if device_id is not set, and no gpus have been found, kokkos will use CPU
53ab4ee011SJunchao Zhang     if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId));
54ab4ee011SJunchao Zhang   #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
55ab4ee011SJunchao Zhang     args.set_device_id(static_cast<int>(deviceId));
56c66e0907SJunchao Zhang   #else
57ab4ee011SJunchao Zhang     PetscCall(PetscMPIIntCast(deviceId, &args.device_id));
58375e5adfSJunchao Zhang   #endif
59c66e0907SJunchao Zhang #endif
6011f0be55SJunchao Zhang 
61471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
62c66e0907SJunchao Zhang     args.set_disable_warnings(!PetscDefined(HAVE_KOKKOS_INIT_WARNINGS));
63c66e0907SJunchao Zhang #else
6462825ce1SJacob Faibussowitsch     args.disable_warnings = !PetscDefined(HAVE_KOKKOS_INIT_WARNINGS);
65c66e0907SJunchao Zhang #endif
6662825ce1SJacob Faibussowitsch 
6711f0be55SJunchao Zhang     /* To use PetscNumOMPThreads, one has to configure petsc --with-openmp.
6811f0be55SJunchao Zhang        Otherwise, let's keep the default value (-1) of args.num_threads.
6911f0be55SJunchao Zhang     */
7062825ce1SJacob Faibussowitsch #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP)
71471471fdSJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
72c66e0907SJunchao Zhang     args.set_num_threads(PetscNumOMPThreads);
73c66e0907SJunchao Zhang   #else
7411f0be55SJunchao Zhang     args.num_threads = PetscNumOMPThreads;
7511f0be55SJunchao Zhang   #endif
76c66e0907SJunchao Zhang #endif
77ab4ee011SJunchao Zhang     PetscCallCXX(Kokkos::initialize(args));
789c9deb76SJunchao Zhang     PetscBeganKokkos = PETSC_TRUE;
799c9deb76SJunchao Zhang   }
80*c9903f8fSJunchao Zhang 
819c9deb76SJunchao Zhang   if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by petsc or by user, we need to init PetscKokkosExecutionSpacePtr
82*c9903f8fSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP))
83*c9903f8fSJunchao Zhang     PetscDeviceContext dctx;
84*c9903f8fSJunchao Zhang     PetscDeviceType    dtype;
85*c9903f8fSJunchao Zhang 
86*c9903f8fSJunchao Zhang     PetscDeviceContextGetCurrentContext(&dctx); // it internally sets PetscDefaultCuda/HipStream
87*c9903f8fSJunchao Zhang     PetscDeviceContextGetDeviceType(dctx, &dtype);
88*c9903f8fSJunchao Zhang 
89524fe776SJunchao Zhang   #if defined(PETSC_HAVE_CUDA)
90*c9903f8fSJunchao Zhang     if (dtype == PETSC_DEVICE_CUDA) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream));
91*c9903f8fSJunchao Zhang   #elif defined(PETSC_HAVE_HIP)
92*c9903f8fSJunchao Zhang     if (dtype == PETSC_DEVICE_HIP) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream));
93*c9903f8fSJunchao Zhang   #endif
94524fe776SJunchao Zhang #else
95*c9903f8fSJunchao Zhang     // In all other cases, we use Kokkos default
96524fe776SJunchao Zhang     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace());
97524fe776SJunchao Zhang #endif
9859e55d94SJunchao Zhang   }
99*c9903f8fSJunchao Zhang 
100e907feaaSJunchao Zhang   if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars
101e907feaaSJunchao Zhang     PetscScalarPoolSize = 1024;
102e907feaaSJunchao Zhang     PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize)));
103e907feaaSJunchao Zhang   }
104e907feaaSJunchao Zhang 
105e907feaaSJunchao Zhang   PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called
1063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
107375e5adfSJunchao Zhang }
108