xref: /honee/src/smartsim/smartsim.c (revision 4fa1625a4f8384d952910d1287237123ea0ab4e4)
1dc936754SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
27cd70835SJames Wright // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
37cd70835SJames Wright //
47cd70835SJames Wright // SPDX-License-Identifier: BSD-2-Clause
57cd70835SJames Wright //
67cd70835SJames Wright // This file is part of CEED:  http://github.com/ceed
77cd70835SJames Wright // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation
87cd70835SJames Wright 
97cd70835SJames Wright #include "../../include/smartsim.h"
107cd70835SJames Wright 
117cd70835SJames Wright #include "../../navierstokes.h"
127cd70835SJames Wright 
137cd70835SJames Wright PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) {
147cd70835SJames Wright   bool does_exist = true;
157cd70835SJames Wright 
167cd70835SJames Wright   PetscFunctionBeginUser;
17*4fa1625aSJames Wright   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
18ff6b888aSJames Wright   PetscSmartRedisCall(tensor_exists(c_client, name, name_length, &does_exist));
197cd70835SJames Wright   PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name);
20*4fa1625aSJames Wright   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
217cd70835SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
227cd70835SJames Wright }
237cd70835SJames Wright 
247cd70835SJames Wright PetscErrorCode SmartSimTrainingSetup(User user) {
257cd70835SJames Wright   SmartSimData smartsim = user->smartsim;
267cd70835SJames Wright   PetscMPIInt  rank;
277cd70835SJames Wright   PetscReal    checkrun[2] = {1};
287cd70835SJames Wright   size_t       dim_2[1]    = {2};
297cd70835SJames Wright 
307cd70835SJames Wright   PetscFunctionBeginUser;
317cd70835SJames Wright   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
327cd70835SJames Wright 
337cd70835SJames Wright   if (rank % smartsim->collocated_database_num_ranks == 0) {
347cd70835SJames Wright     // -- Send array that communicates when ML is done training
35ad2e713eSRiccardo Balin     PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
36ff6b888aSJames Wright     PetscSmartRedisCall(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous));
377cd70835SJames Wright     PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9));
38ad2e713eSRiccardo Balin     PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
397cd70835SJames Wright   }
40aa0b7f76SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
417cd70835SJames Wright }
427cd70835SJames Wright 
437cd70835SJames Wright PetscErrorCode SmartSimSetup(User user) {
447cd70835SJames Wright   PetscMPIInt rank;
457cd70835SJames Wright   PetscInt    num_orchestrator_nodes = 1;
467cd70835SJames Wright 
477cd70835SJames Wright   PetscFunctionBeginUser;
487cd70835SJames Wright   PetscCall(PetscNew(&user->smartsim));
497cd70835SJames Wright   SmartSimData smartsim = user->smartsim;
507cd70835SJames Wright 
517cd70835SJames Wright   smartsim->collocated_database_num_ranks = 1;
527cd70835SJames Wright   PetscOptionsBegin(user->comm, NULL, "Options for SmartSim integration", NULL);
537cd70835SJames Wright   PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL,
547cd70835SJames Wright                             smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL));
557cd70835SJames Wright   PetscOptionsEnd();
567cd70835SJames Wright 
577cd70835SJames Wright   // Create prefix to be put on tensor names
587cd70835SJames Wright   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
59*4fa1625aSJames Wright   PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank));
607cd70835SJames Wright 
61ad2e713eSRiccardo Balin   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
62*4fa1625aSJames Wright   PetscSmartRedisCall(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client));
63ad2e713eSRiccardo Balin   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
647cd70835SJames Wright 
657cd70835SJames Wright   PetscCall(SmartSimTrainingSetup(user));
667cd70835SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
677cd70835SJames Wright }
68ec6e4151SJames Wright 
69ec6e4151SJames Wright PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) {
70ec6e4151SJames Wright   PetscFunctionBeginUser;
71ec6e4151SJames Wright   if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS);
72ec6e4151SJames Wright 
73ff6b888aSJames Wright   PetscSmartRedisCall(DeleteCClient(&smartsim->client));
74ec6e4151SJames Wright   PetscCall(PetscFree(smartsim));
75ec6e4151SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
76ec6e4151SJames Wright }
77