1ae2b091fSJames Wright // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors. 2ae2b091fSJames Wright // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause 37cd70835SJames Wright // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation 47cd70835SJames Wright 5149fb536SJames Wright #include <smartsim.h> 67cd70835SJames Wright 7149fb536SJames Wright #include <navierstokes.h> 87cd70835SJames Wright 97cd70835SJames Wright PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) { 107cd70835SJames Wright bool does_exist = true; 117cd70835SJames Wright 127cd70835SJames Wright PetscFunctionBeginUser; 13*ea615d4cSJames Wright PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 1443e9749fSJames Wright PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist)); 157cd70835SJames Wright PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name); 16*ea615d4cSJames Wright PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 177cd70835SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 187cd70835SJames Wright } 197cd70835SJames Wright 200c373b74SJames Wright PetscErrorCode SmartSimTrainingSetup(Honee honee) { 210c373b74SJames Wright SmartSimData smartsim = honee->smartsim; 227cd70835SJames Wright PetscMPIInt rank; 237cd70835SJames Wright PetscReal checkrun[2] = {1}; 247cd70835SJames Wright size_t dim_2[1] = {2}; 257cd70835SJames Wright 267cd70835SJames Wright PetscFunctionBeginUser; 270c373b74SJames Wright PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 287cd70835SJames Wright 297cd70835SJames Wright if (rank % smartsim->collocated_database_num_ranks == 0) { 307cd70835SJames Wright // -- Send array that communicates when ML is done training 31*ea615d4cSJames Wright PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 3243e9749fSJames Wright PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous)); 337cd70835SJames Wright PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9)); 34*ea615d4cSJames Wright PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 357cd70835SJames Wright } 36aa0b7f76SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 377cd70835SJames Wright } 387cd70835SJames Wright 390c373b74SJames Wright PetscErrorCode SmartSimSetup(Honee honee) { 407cd70835SJames Wright PetscMPIInt rank; 417cd70835SJames Wright PetscInt num_orchestrator_nodes = 1; 427cd70835SJames Wright 437cd70835SJames Wright PetscFunctionBeginUser; 440c373b74SJames Wright PetscCall(PetscNew(&honee->smartsim)); 450c373b74SJames Wright SmartSimData smartsim = honee->smartsim; 467cd70835SJames Wright 477cd70835SJames Wright smartsim->collocated_database_num_ranks = 1; 480c373b74SJames Wright PetscOptionsBegin(honee->comm, NULL, "Options for SmartSim integration", NULL); 497cd70835SJames Wright PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL, 507cd70835SJames Wright smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL)); 517cd70835SJames Wright PetscOptionsEnd(); 527cd70835SJames Wright 537cd70835SJames Wright // Create prefix to be put on tensor names 540c373b74SJames Wright PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 554fa1625aSJames Wright PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank)); 567cd70835SJames Wright 57*ea615d4cSJames Wright PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 5843e9749fSJames Wright PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client)); 59*ea615d4cSJames Wright PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 607cd70835SJames Wright 610c373b74SJames Wright PetscCall(SmartSimTrainingSetup(honee)); 627cd70835SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 637cd70835SJames Wright } 64ec6e4151SJames Wright 65ec6e4151SJames Wright PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) { 66ec6e4151SJames Wright PetscFunctionBeginUser; 67ec6e4151SJames Wright if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS); 68ec6e4151SJames Wright 6943e9749fSJames Wright PetscCallSmartRedis(DeleteCClient(&smartsim->client)); 70ec6e4151SJames Wright PetscCall(PetscFree(smartsim)); 71ec6e4151SJames Wright PetscFunctionReturn(PETSC_SUCCESS); 72ec6e4151SJames Wright } 73