xref: /honee/src/smartsim/smartsim.c (revision ae2b091fac884a554e48acc4b4c187524c2a2818)
1*ae2b091fSJames Wright // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2*ae2b091fSJames Wright // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
37cd70835SJames Wright // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation
47cd70835SJames Wright 
5149fb536SJames Wright #include <smartsim.h>
67cd70835SJames Wright 
7149fb536SJames Wright #include <navierstokes.h>
87cd70835SJames Wright 
97cd70835SJames Wright PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) {
107cd70835SJames Wright   bool does_exist = true;
117cd70835SJames Wright 
127cd70835SJames Wright   PetscFunctionBeginUser;
134fa1625aSJames Wright   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
1443e9749fSJames Wright   PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist));
157cd70835SJames Wright   PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name);
164fa1625aSJames Wright   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
177cd70835SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
187cd70835SJames Wright }
197cd70835SJames Wright 
207cd70835SJames Wright PetscErrorCode SmartSimTrainingSetup(User user) {
217cd70835SJames Wright   SmartSimData smartsim = user->smartsim;
227cd70835SJames Wright   PetscMPIInt  rank;
237cd70835SJames Wright   PetscReal    checkrun[2] = {1};
247cd70835SJames Wright   size_t       dim_2[1]    = {2};
257cd70835SJames Wright 
267cd70835SJames Wright   PetscFunctionBeginUser;
277cd70835SJames Wright   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
287cd70835SJames Wright 
297cd70835SJames Wright   if (rank % smartsim->collocated_database_num_ranks == 0) {
307cd70835SJames Wright     // -- Send array that communicates when ML is done training
31ad2e713eSRiccardo Balin     PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
3243e9749fSJames Wright     PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous));
337cd70835SJames Wright     PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9));
34ad2e713eSRiccardo Balin     PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
357cd70835SJames Wright   }
36aa0b7f76SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
377cd70835SJames Wright }
387cd70835SJames Wright 
397cd70835SJames Wright PetscErrorCode SmartSimSetup(User user) {
407cd70835SJames Wright   PetscMPIInt rank;
417cd70835SJames Wright   PetscInt    num_orchestrator_nodes = 1;
427cd70835SJames Wright 
437cd70835SJames Wright   PetscFunctionBeginUser;
447cd70835SJames Wright   PetscCall(PetscNew(&user->smartsim));
457cd70835SJames Wright   SmartSimData smartsim = user->smartsim;
467cd70835SJames Wright 
477cd70835SJames Wright   smartsim->collocated_database_num_ranks = 1;
487cd70835SJames Wright   PetscOptionsBegin(user->comm, NULL, "Options for SmartSim integration", NULL);
497cd70835SJames Wright   PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL,
507cd70835SJames Wright                             smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL));
517cd70835SJames Wright   PetscOptionsEnd();
527cd70835SJames Wright 
537cd70835SJames Wright   // Create prefix to be put on tensor names
547cd70835SJames Wright   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
554fa1625aSJames Wright   PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank));
567cd70835SJames Wright 
57ad2e713eSRiccardo Balin   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
5843e9749fSJames Wright   PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client));
59ad2e713eSRiccardo Balin   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
607cd70835SJames Wright 
617cd70835SJames Wright   PetscCall(SmartSimTrainingSetup(user));
627cd70835SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
637cd70835SJames Wright }
64ec6e4151SJames Wright 
65ec6e4151SJames Wright PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) {
66ec6e4151SJames Wright   PetscFunctionBeginUser;
67ec6e4151SJames Wright   if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS);
68ec6e4151SJames Wright 
6943e9749fSJames Wright   PetscCallSmartRedis(DeleteCClient(&smartsim->client));
70ec6e4151SJames Wright   PetscCall(PetscFree(smartsim));
71ec6e4151SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
72ec6e4151SJames Wright }
73