1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors. 2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause 3 // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation 4 5 #include <smartsim.h> 6 7 #include <navierstokes.h> 8 9 PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) { 10 PetscFunctionBeginUser; 11 if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS); 12 13 PetscCallSmartRedis(DeleteCClient(&smartsim->client)); 14 PetscCall(PetscFree(smartsim)); 15 PetscFunctionReturn(PETSC_SUCCESS); 16 } 17 18 static PetscErrorCode SmartSimTrainingSetup(Honee honee) { 19 SmartSimData smartsim = honee->smartsim; 20 PetscMPIInt rank; 21 PetscReal checkrun[2] = {1}; 22 size_t dim_2[1] = {2}; 23 24 PetscFunctionBeginUser; 25 PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 26 27 if (rank % smartsim->collocated_database_num_ranks == 0) { 28 // -- Send array that communicates when ML is done training 29 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 30 PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous)); 31 PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9)); 32 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 33 } 34 PetscFunctionReturn(PETSC_SUCCESS); 35 } 36 37 PetscErrorCode SmartSimSetup(Honee honee) { 38 PetscMPIInt rank; 39 PetscInt num_orchestrator_nodes = 1; 40 41 PetscFunctionBeginUser; 42 PetscCall(PetscNew(&honee->smartsim)); 43 SmartSimData smartsim = honee->smartsim; 44 45 smartsim->collocated_database_num_ranks = 1; 46 PetscOptionsBegin(honee->comm, NULL, "Options for SmartSim integration", NULL); 47 PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL, 48 smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL)); 49 PetscOptionsEnd(); 50 51 // Create prefix to be put on tensor names 52 PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 53 PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank)); 54 55 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 56 PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client)); 57 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 58 59 PetscCall(SmartSimTrainingSetup(honee)); 60 PetscFunctionReturn(PETSC_SUCCESS); 61 } 62 63 PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) { 64 bool does_exist = true; 65 66 PetscFunctionBeginUser; 67 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 68 PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist)); 69 PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name); 70 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73