// SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
// SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause

/// @file
/// Implementation of the Synthetic Turbulence Generation (STG) algorithm
/// presented in Shur et al. 2014

#include "stg_shur14.h"

#include <ceed.h>
#include <math.h>
#include <petscdm.h>
#include <stdlib.h>

#include <navierstokes.h>
#include "../qfunctions/stg_shur14.h"

StgShur14Context global_stg_ctx;

/*
 * @brief Perform Cholesky decomposition on array of symmetric 3x3 matrices
 *
 * This assumes the input matrices are in order [11,22,33,12,13,23].
 * This format is also used for the output.
 *
 * @param[in]  comm   MPI_Comm
 * @param[in]  nprofs Number of matrices in Rij
 * @param[in]  Rij    Array of the symmetric matrices [6,nprofs]
 * @param[out] Cij    Array of the Cholesky Decomposition matrices, [6,nprofs]
 */
PetscErrorCode CalcCholeskyDecomp(MPI_Comm comm, PetscInt nprofs, const CeedScalar Rij[6][nprofs], CeedScalar Cij[6][nprofs]) {
  PetscFunctionBeginUser;
  for (PetscInt i = 0; i < nprofs; i++) {
    Cij[0][i] = sqrt(Rij[0][i]);
    Cij[3][i] = Rij[3][i] / Cij[0][i];
    Cij[1][i] = sqrt(Rij[1][i] - Square(Cij[3][i]));
    Cij[4][i] = Rij[4][i] / Cij[0][i];
    Cij[5][i] = (Rij[5][i] - Cij[3][i] * Cij[4][i]) / Cij[1][i];
    Cij[2][i] = sqrt(Rij[2][i] - Square(Cij[4][i]) - Square(Cij[5][i]));

    PetscCheck(!isnan(Cij[0][i]) && !isnan(Cij[1][i]) && !isnan(Cij[2][i]), comm, PETSC_ERR_FP,
               "Cholesky decomposition failed at profile point %" PetscInt_FMT ". Either STGInflow has non-SPD matrix or contains nan.", i + 1);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
 * @brief Read the STGInflow file and load the contents into stg_ctx
 *
 * Assumes that the first line of the file has the number of rows and columns as the only two entries, separated by a single space.
 * Assumes there are 14 columns in the file.
 *
 * Function calculates the Cholesky decomposition from the Reynolds stress profile found in the file.
 *
 * @param[in]     comm    MPI_Comm for the program
 * @param[in]     path    Path to the STGInflow.dat file
 * @param[in,out] stg_ctx STGShur14Context where the data will be loaded into
 */
static PetscErrorCode ReadStgInflow(const MPI_Comm comm, const char path[PETSC_MAX_PATH_LEN], StgShur14Context stg_ctx) {
  PetscInt       dims[2];
  int            ndims;
  FILE          *fp;
  const PetscInt char_array_len = 512;
  char           line[char_array_len];
  char         **array;

  PetscFunctionBeginUser;
  PetscCall(PhastaDatFileOpen(comm, path, char_array_len, dims, &fp));

  CeedScalar  rij[6][stg_ctx->nprofs];
  CeedScalar *wall_dist              = &stg_ctx->data[stg_ctx->offsets.wall_dist];
  CeedScalar *eps                    = &stg_ctx->data[stg_ctx->offsets.eps];
  CeedScalar *lt                     = &stg_ctx->data[stg_ctx->offsets.lt];
  CeedScalar(*ubar)[stg_ctx->nprofs] = (CeedScalar(*)[stg_ctx->nprofs]) & stg_ctx->data[stg_ctx->offsets.ubar];

  for (PetscInt i = 0; i < stg_ctx->nprofs; i++) {
    PetscCall(PetscSynchronizedFGets(comm, fp, char_array_len, line));
    PetscCall(PetscStrToArray(line, ' ', &ndims, &array));
    PetscCheck(ndims == dims[1], comm, PETSC_ERR_FILE_UNEXPECTED,
               "Line %" PetscInt_FMT " of %s does not have correct number of columns (%d instead of %" PetscInt_FMT ")", i, path, ndims, dims[1]);

    wall_dist[i] = (CeedScalar)atof(array[0]);
    ubar[0][i]   = (CeedScalar)atof(array[1]);
    ubar[1][i]   = (CeedScalar)atof(array[2]);
    ubar[2][i]   = (CeedScalar)atof(array[3]);
    rij[0][i]    = (CeedScalar)atof(array[4]);
    rij[1][i]    = (CeedScalar)atof(array[5]);
    rij[2][i]    = (CeedScalar)atof(array[6]);
    rij[3][i]    = (CeedScalar)atof(array[7]);
    rij[4][i]    = (CeedScalar)atof(array[8]);
    rij[5][i]    = (CeedScalar)atof(array[9]);
    lt[i]        = (CeedScalar)atof(array[12]);
    eps[i]       = (CeedScalar)atof(array[13]);

    PetscCheck(wall_dist[i] >= 0, comm, PETSC_ERR_FILE_UNEXPECTED, "Distance to wall in %s cannot be negative", path);
    PetscCheck(lt[i] >= 0, comm, PETSC_ERR_FILE_UNEXPECTED, "Turbulent length scale in %s cannot be negative", path);
    PetscCheck(eps[i] >= 0, comm, PETSC_ERR_FILE_UNEXPECTED, "Turbulent dissipation in %s cannot be negative", path);
    PetscCall(PetscStrToArrayDestroy(ndims, array));
  }
  CeedScalar(*cij)[stg_ctx->nprofs] = (CeedScalar(*)[stg_ctx->nprofs]) & stg_ctx->data[stg_ctx->offsets.cij];
  PetscCall(CalcCholeskyDecomp(comm, stg_ctx->nprofs, rij, cij));
  PetscCall(PetscFClose(comm, fp));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
 * @brief Read the STGRand file and load the contents into stg_ctx
 *
 * Assumes that the first line of the file has the number of rows and columns as the only two entries, separated by a single space.
 * Assumes there are 7 columns in the file.
 *
 * @param[in]     comm    MPI_Comm for the program
 * @param[in]     path    Path to the STGRand.dat file
 * @param[in,out] stg_ctx STGShur14Context where the data will be loaded into
 */
static PetscErrorCode ReadStgRand(const MPI_Comm comm, const char path[PETSC_MAX_PATH_LEN], StgShur14Context stg_ctx) {
  PetscInt       dims[2];
  int            ndims;
  FILE          *fp;
  const PetscInt char_array_len = 512;
  char           line[char_array_len];
  char         **array;

  PetscFunctionBeginUser;
  PetscCall(PhastaDatFileOpen(comm, path, char_array_len, dims, &fp));

  CeedScalar *phi                     = &stg_ctx->data[stg_ctx->offsets.phi];
  CeedScalar(*d)[stg_ctx->nmodes]     = (CeedScalar(*)[stg_ctx->nmodes]) & stg_ctx->data[stg_ctx->offsets.d];
  CeedScalar(*sigma)[stg_ctx->nmodes] = (CeedScalar(*)[stg_ctx->nmodes]) & stg_ctx->data[stg_ctx->offsets.sigma];

  for (PetscInt i = 0; i < stg_ctx->nmodes; i++) {
    PetscCall(PetscSynchronizedFGets(comm, fp, char_array_len, line));
    PetscCall(PetscStrToArray(line, ' ', &ndims, &array));
    PetscCheck(ndims == dims[1], comm, PETSC_ERR_FILE_UNEXPECTED,
               "Line %" PetscInt_FMT " of %s does not have correct number of columns (%d instead of %" PetscInt_FMT ")", i, path, ndims, dims[1]);

    d[0][i]     = (CeedScalar)atof(array[0]);
    d[1][i]     = (CeedScalar)atof(array[1]);
    d[2][i]     = (CeedScalar)atof(array[2]);
    phi[i]      = (CeedScalar)atof(array[3]);
    sigma[0][i] = (CeedScalar)atof(array[4]);
    sigma[1][i] = (CeedScalar)atof(array[5]);
    sigma[2][i] = (CeedScalar)atof(array[6]);
    PetscCall(PetscStrToArrayDestroy(ndims, array));
  }
  PetscCall(PetscFClose(comm, fp));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
 * @brief Read STG data from input paths and put in STGShur14Context
 *
 * Reads data from input paths and puts them into a STGShur14Context object.
 * Data stored initially in `*stg_ctx` will be copied over to the new STGShur14Context instance.
 *
 * @param[in]     comm            MPI_Comm for the program
 * @param[in]     dm              DM for the program
 * @param[in]     stg_inflow_path Path to STGInflow.dat file
 * @param[in]     stg_rand_path   Path to STGRand.dat file
 * @param[in,out] stg_ctx         Pointer to STGShur14Context where the data will be loaded into
 */
PetscErrorCode GetStgContextData(const MPI_Comm comm, const DM dm, char stg_inflow_path[PETSC_MAX_PATH_LEN], char stg_rand_path[PETSC_MAX_PATH_LEN],
                                 StgShur14Context *stg_ctx) {
  PetscInt nmodes = 0, nprofs;

  PetscFunctionBeginUser;
  PetscCall(PhastaDatFileGetNRows(comm, stg_inflow_path, &nprofs));
  const PetscBool need_rand = (!(*stg_ctx)->mean_only) || (*stg_ctx)->use_fluctuating_IC;
  if (need_rand) {
    PetscCall(PhastaDatFileGetNRows(comm, stg_rand_path, &nmodes));
    PetscCheck(nmodes < STG_NMODES_MAX, comm, PETSC_ERR_SUP,
               "Number of wavemodes in %s (%" PetscInt_FMT ") exceeds STG_NMODES_MAX (%d). Change size of STG_NMODES_MAX and recompile",
               stg_rand_path, nmodes, STG_NMODES_MAX);
  }

  {
    StgShur14Context temp_ctx;
    PetscCall(PetscNew(&temp_ctx));
    *temp_ctx        = **stg_ctx;
    temp_ctx->nmodes = nmodes;
    temp_ctx->nprofs = nprofs;
    // nmode = 0 if random numbers are not read from file, therefore offsets will be correctly handled
    temp_ctx->offsets.sigma     = 0;
    temp_ctx->offsets.d         = nmodes * 3;
    temp_ctx->offsets.phi       = temp_ctx->offsets.d + nmodes * 3;
    temp_ctx->offsets.kappa     = temp_ctx->offsets.phi + nmodes;
    temp_ctx->offsets.wall_dist = temp_ctx->offsets.kappa + nmodes;
    temp_ctx->offsets.ubar      = temp_ctx->offsets.wall_dist + nprofs;
    temp_ctx->offsets.cij       = temp_ctx->offsets.ubar + nprofs * 3;
    temp_ctx->offsets.eps       = temp_ctx->offsets.cij + nprofs * 6;
    temp_ctx->offsets.lt        = temp_ctx->offsets.eps + nprofs;
    PetscInt total_num_scalars  = temp_ctx->offsets.lt + nprofs;
    temp_ctx->total_bytes       = sizeof(*temp_ctx) + total_num_scalars * sizeof(temp_ctx->data[0]);
    PetscCall(PetscFree(*stg_ctx));
    PetscCall(PetscMalloc(temp_ctx->total_bytes, stg_ctx));
    **stg_ctx = *temp_ctx;
    PetscCall(PetscFree(temp_ctx));
  }

  PetscCall(ReadStgInflow(comm, stg_inflow_path, *stg_ctx));
  if (need_rand) {
    PetscCall(ReadStgRand(comm, stg_rand_path, *stg_ctx));
    CeedScalar *kappa     = &(*stg_ctx)->data[(*stg_ctx)->offsets.kappa];
    CeedScalar *wall_dist = &(*stg_ctx)->data[(*stg_ctx)->offsets.wall_dist];
    CeedScalar *lt        = &(*stg_ctx)->data[(*stg_ctx)->offsets.lt];
    CeedScalar  le, le_max = 0;

    CeedPragmaSIMD for (PetscInt i = 0; i < (*stg_ctx)->nprofs; i++) {
      le = PetscMin(2 * wall_dist[i], 3 * lt[i]);
      if (le_max < le) le_max = le;
    }
    CeedScalar kmin = M_PI / le_max;

    CeedPragmaSIMD for (PetscInt i = 0; i < (*stg_ctx)->nmodes; i++) kappa[i] = kmin * pow((*stg_ctx)->alpha, i);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode STGWeakInflowBCSetup_CreateIFunctionQF(BCDefinition bc_def, CeedQFunction *qf) {
  HoneeBCStruct honee_bc;

  PetscFunctionBeginUser;
  PetscCall(BCDefinitionGetContext(bc_def, &honee_bc));
  PetscCall(HoneeBCCreateIFunctionQF(bc_def, StgShur14Inflow, StgShur14Inflow_loc, honee_bc->qfctx, qf));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode STGWeakInflowBCSetup_CreateIJacobianQF(BCDefinition bc_def, CeedQFunction *qf) {
  HoneeBCStruct honee_bc;

  PetscFunctionBeginUser;
  PetscCall(BCDefinitionGetContext(bc_def, &honee_bc));
  PetscCall(HoneeBCCreateIJacobianQF(bc_def, StgShur14Inflow_Jacobian, StgShur14Inflow_Jacobian_loc, honee_bc->qfctx, qf));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode SetupStg(const MPI_Comm comm, const DM dm, ProblemData problem, Honee honee, const bool prescribe_T, const CeedScalar theta0,
                        const CeedScalar P0) {
  Ceed                     ceed                                = honee->ceed;
  char                     stg_inflow_path[PETSC_MAX_PATH_LEN] = "./STGInflow.dat";
  char                     stg_rand_path[PETSC_MAX_PATH_LEN]   = "./STGRand.dat";
  PetscBool                mean_only = PETSC_FALSE, use_stgstrong = PETSC_FALSE, use_fluctuating_IC = PETSC_FALSE, given_stg_dx = PETSC_FALSE;
  CeedScalar               u0 = 0.0, alpha = 1.01, stg_dx = -1, stg_h_scale_factor = 1 / honee->app_ctx->degree;
  CeedQFunctionContext     stg_qfctx;
  NewtonianIdealGasContext newtonian_ig_ctx;

  PetscFunctionBeginUser;
  PetscOptionsBegin(comm, NULL, "STG Boundary Condition Options", NULL);
  PetscCall(PetscOptionsString("-stg_inflow_path", "Path to STGInflow.dat", NULL, stg_inflow_path, stg_inflow_path, sizeof(stg_inflow_path), NULL));
  PetscCall(PetscOptionsString("-stg_rand_path", "Path to STGRand.dat", NULL, stg_rand_path, stg_rand_path, sizeof(stg_rand_path), NULL));
  PetscCall(PetscOptionsReal("-stg_alpha", "Growth rate of the wavemodes", NULL, alpha, &alpha, NULL));
  PetscCall(PetscOptionsReal("-stg_u0", "Advective velocity for the fluctuations", NULL, u0, &u0, NULL));
  PetscCall(PetscOptionsBool("-stg_mean_only", "Only apply mean profile", NULL, mean_only, &mean_only, NULL));
  PetscCall(PetscOptionsBool("-stg_strong", "Enforce STG inflow strongly", NULL, use_stgstrong, &use_stgstrong, NULL));
  PetscCall(PetscOptionsBool("-stg_fluctuating_IC", "\"Extrude\" the fluctuations through the domain as an initial condition", NULL,
                             use_fluctuating_IC, &use_fluctuating_IC, NULL));
  PetscCall(PetscOptionsReal("-stg_dx", "Element length in x direction at inflow", NULL, stg_dx, &stg_dx, &given_stg_dx));
  if (given_stg_dx && use_stgstrong) PetscCall(PetscPrintf(comm, "WARNING: -stg_dx is ignored for -stg_strong\n"));
  PetscCall(PetscOptionsReal("-stg_h_scale_factor", "Scale element size for cutoff frequency calculation", NULL, stg_h_scale_factor,
                             &stg_h_scale_factor, NULL));
  PetscCall(PetscOptionsDeprecated("-stg_dyScale", NULL, "libCEED 0.12.0", "Use -stg_h_scale_factor to scale all the element dimensions"));
  PetscCall(PetscOptionsDeprecated("-stg_dz", NULL, "libCEED 0.12.0", NULL));
  PetscOptionsEnd();

  PetscCall(PetscNew(&global_stg_ctx));
  *global_stg_ctx = (struct STGShur14Context_){
      .alpha              = alpha,
      .u0                 = u0,
      .is_implicit        = honee->phys->implicit,
      .prescribe_T        = prescribe_T,
      .mean_only          = mean_only,
      .use_fluctuating_IC = use_fluctuating_IC,
      .theta0             = theta0,
      .P0                 = P0,
      .h_scale_factor     = stg_h_scale_factor,
  };

  if (!use_stgstrong) {  // Calculate dx assuming constant spacing
    PetscReal domain_min[3], domain_max[3], domain_size[3];
    PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
    for (PetscInt i = 0; i < 3; i++) domain_size[i] = domain_max[i] - domain_min[i];

    PetscInt nmax = 3, faces[3];
    PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-dm_plex_box_faces", faces, &nmax, NULL));
    global_stg_ctx->dx = given_stg_dx ? stg_dx : domain_size[0] / faces[0];
    PetscCheck((global_stg_ctx->dx > 0) && PetscIsNormalReal((PetscReal)global_stg_ctx->dx), comm, PETSC_ERR_LIB,
               "STG dx must be positive normal number, got %g", global_stg_ctx->dx);
  }

  PetscCallCeed(ceed, CeedQFunctionContextGetData(problem->apply_vol_rhs.qfctx, CEED_MEM_HOST, &newtonian_ig_ctx));
  global_stg_ctx->newt_ctx = *newtonian_ig_ctx;
  PetscCallCeed(ceed, CeedQFunctionContextRestoreData(problem->apply_vol_rhs.qfctx, &newtonian_ig_ctx));

  PetscCall(GetStgContextData(comm, dm, stg_inflow_path, stg_rand_path, &global_stg_ctx));

  PetscCallCeed(ceed, CeedQFunctionContextCreate(honee->ceed, &stg_qfctx));
  PetscCallCeed(ceed, CeedQFunctionContextSetData(stg_qfctx, CEED_MEM_HOST, CEED_USE_POINTER, global_stg_ctx->total_bytes, global_stg_ctx));
  PetscCallCeed(ceed, CeedQFunctionContextSetDataDestroy(stg_qfctx, CEED_MEM_HOST, FreeContextPetsc));
  PetscCallCeed(ceed, CeedQFunctionContextRegisterDouble(stg_qfctx, "solution time", offsetof(struct STGShur14Context_, time), 1,
                                                         "Physical time of the solution"));

  PetscCallCeed(ceed, CeedQFunctionContextDestroy(&problem->ics.qfctx));
  problem->ics = (HoneeQFSpec){.qf_func_ptr = ICsStg, .qf_loc = ICsStg_loc, .qfctx = stg_qfctx};

  if (use_stgstrong) {
    // Use default boundary integral QF (BoundaryIntegral) in newtonian.h
    problem->use_strong_bc_ceed = PETSC_TRUE;
    problem->set_bc_from_ics    = PETSC_FALSE;
  } else {
    for (PetscCount b = 0; b < problem->num_bc_defs; b++) {
      BCDefinition bc_def = problem->bc_defs[b];
      const char  *name;

      PetscCall(BCDefinitionGetInfo(bc_def, &name, NULL, NULL));
      if (!strcmp(name, "inflow")) {
        HoneeBCStruct honee_bc;

        PetscCall(PetscNew(&honee_bc));
        PetscCallCeed(ceed, CeedQFunctionContextReferenceCopy(stg_qfctx, &honee_bc->qfctx));
        honee_bc->honee              = honee;
        honee_bc->num_comps_jac_data = honee->phys->implicit ? 11 : 0;
        PetscCall(BCDefinitionSetContext(bc_def, (PetscCtxDestroyFn *)HoneeBCDestroy, honee_bc));

        PetscCall(BCDefinitionSetIFunction(bc_def, STGWeakInflowBCSetup_CreateIFunctionQF, HoneeBCAddIFunctionOp));
        PetscCall(BCDefinitionSetIJacobian(bc_def, STGWeakInflowBCSetup_CreateIJacobianQF, HoneeBCAddIJacobianOp));
      }
    }
    problem->set_bc_from_ics = PETSC_TRUE;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

// @brief Set STG strongly enforce components using DMAddBoundary
PetscErrorCode SetupStrongStg(DM dm, ProblemData problem, Physics phys) {
  DMLabel  label;
  PetscInt comps[5], num_comps = 4;

  PetscFunctionBeginUser;
  switch (phys->state_var) {
    case STATEVAR_CONSERVATIVE:
      // {0,1,2,3} for rho, rho*u, rho*v, rho*w
      for (int i = 0; i < 4; i++) comps[i] = i;
      break;

    case STATEVAR_PRIMITIVE:
      // {1,2,3,4} for u, v, w, T
      for (int i = 0; i < 4; i++) comps[i] = i + 1;
      break;

    case STATEVAR_ENTROPY:
      // {1,2,3,4}
      for (int i = 0; i < 4; i++) comps[i] = i + 1;
      break;
  }

  PetscCall(DMGetLabel(dm, "Face Sets", &label));
  for (PetscCount b = 0; b < problem->num_bc_defs; b++) {
    BCDefinition bc_def = problem->bc_defs[b];
    const char  *name;

    PetscCall(BCDefinitionGetInfo(bc_def, &name, NULL, NULL));
    if (!strcmp(name, "inflow")) PetscCall(BCDefinitionSetEssential(bc_def, num_comps, comps));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode SetupStrongStg_QF(Ceed ceed, ProblemData problem, CeedInt num_comp_x, CeedInt num_comp_q, CeedInt stg_data_size, CeedInt dXdx_size,
                                 CeedQFunction *qf_strongbc) {
  PetscFunctionBeginUser;
  PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, StgShur14InflowStrongQF, StgShur14InflowStrongQF_loc, qf_strongbc));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "dXdx", dXdx_size, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "scale", 1, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "stg data", stg_data_size, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddOutput(*qf_strongbc, "q", num_comp_q, CEED_EVAL_NONE));

  PetscCallCeed(ceed, CeedQFunctionSetContext(*qf_strongbc, problem->ics.qfctx));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode SetupStrongStg_PreProcessing(Ceed ceed, ProblemData problem, CeedInt num_comp_x, CeedInt stg_data_size, CeedInt dXdx_size,
                                            CeedQFunction *qf_strongbc) {
  PetscFunctionBeginUser;
  PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, StgShur14Preprocess, StgShur14Preprocess_loc, qf_strongbc));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "dXdx", dXdx_size, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddInput(*qf_strongbc, "x", num_comp_x, CEED_EVAL_NONE));
  PetscCallCeed(ceed, CeedQFunctionAddOutput(*qf_strongbc, "stg data", stg_data_size, CEED_EVAL_NONE));

  PetscCallCeed(ceed, CeedQFunctionSetContext(*qf_strongbc, problem->ics.qfctx));
  PetscFunctionReturn(PETSC_SUCCESS);
}
