// SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
// SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
#pragma once

#include <ceed/types.h>
#ifndef CEED_RUNNING_JIT_PASS
#include <math.h>
#endif

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

CEED_QFUNCTION_HELPER CeedScalar Max(CeedScalar a, CeedScalar b) { return a < b ? b : a; }
CEED_QFUNCTION_HELPER CeedScalar Min(CeedScalar a, CeedScalar b) { return a < b ? a : b; }

CEED_QFUNCTION_HELPER void SwapScalar(CeedScalar *a, CeedScalar *b) {
  CeedScalar temp = *a;
  *a              = *b;
  *b              = temp;
}

CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x * x; }
CEED_QFUNCTION_HELPER CeedScalar Cube(CeedScalar x) { return x * x * x; }

// @brief Scale vector of length N by scalar alpha
CEED_QFUNCTION_HELPER void ScaleN(CeedScalar *u, const CeedScalar alpha, const CeedInt N) {
  CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) u[i] *= alpha;
}

// @brief Set vector of length N to a value alpha
CEED_QFUNCTION_HELPER void SetValueN(CeedScalar *u, const CeedScalar alpha, const CeedInt N) {
  CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) u[i] = alpha;
}

// @brief Copy N elements from x to y
CEED_QFUNCTION_HELPER void CopyN(const CeedScalar *x, CeedScalar *y, const CeedInt N) { CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) y[i] = x[i]; }

// @brief Copy 3x3 matrix from A to B
CEED_QFUNCTION_HELPER void CopyMat3(const CeedScalar A[3][3], CeedScalar B[3][3]) { CopyN((const CeedScalar *)A, (CeedScalar *)B, 9); }

// @brief Dot product of vectors with N elements
CEED_QFUNCTION_HELPER CeedScalar DotN(const CeedScalar *u, const CeedScalar *v, const CeedInt N) {
  CeedScalar output = 0;
  CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) output += u[i] * v[i];
  return output;
}

// @brief y = \alpha x + y
CEED_QFUNCTION_HELPER void AXPY(CeedScalar alpha, const CeedScalar *x, CeedScalar *y, CeedInt N) {
  CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) y[i] += alpha * x[i];
}

// @brief Dot product of 3 element vectors
CEED_QFUNCTION_HELPER CeedScalar Dot3(const CeedScalar *u, const CeedScalar *v) { return u[0] * v[0] + u[1] * v[1] + u[2] * v[2]; }

// @brief Dot product of 2 element vectors
CEED_QFUNCTION_HELPER CeedScalar Dot2(const CeedScalar *u, const CeedScalar *v) { return u[0] * v[0] + u[1] * v[1]; }

// @brief \ell^2 norm of 3 element vectors
CEED_QFUNCTION_HELPER CeedScalar Norm3(const CeedScalar *u) { return sqrt(u[0] * u[0] + u[1] * u[1] + u[2] * u[2]); }

// @brief \ell^2 norm of 2 element vectors
CEED_QFUNCTION_HELPER CeedScalar Norm2(const CeedScalar *u) { return sqrt(u[0] * u[0] + u[1] * u[1]); }

// @brief Cross product of vectors with 3 elements
CEED_QFUNCTION_HELPER void Cross3(const CeedScalar u[3], const CeedScalar v[3], CeedScalar w[3]) {
  w[0] = (u[1] * v[2]) - (u[2] * v[1]);
  w[1] = (u[2] * v[0]) - (u[0] * v[2]);
  w[2] = (u[0] * v[1]) - (u[1] * v[0]);
}

// @brief Curl of vector given its gradient
CEED_QFUNCTION_HELPER void Curl3(const CeedScalar gradient[3][3], CeedScalar v[3]) {
  v[0] = gradient[2][1] - gradient[1][2];
  v[1] = gradient[0][2] - gradient[2][0];
  v[2] = gradient[1][0] - gradient[0][1];
}

// @brief Matrix vector product, b = Ax + b. A is NxM, x is M, b is N
CEED_QFUNCTION_HELPER void MatVecNM(const CeedScalar *A, const CeedScalar *x, const CeedInt N, const CeedInt M, const CeedTransposeMode transpose_A,
                                    CeedScalar *b) {
  switch (transpose_A) {
    case CEED_NOTRANSPOSE:
      CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) b[i] += DotN(&A[i * M], x, M);
      break;
    case CEED_TRANSPOSE:
      CeedPragmaSIMD for (CeedInt i = 0; i < M; i++) { CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) b[i] += A[j * M + i] * x[j]; }
      break;
  }
}

// @brief 3x3 Matrix vector product  b = Ax + b.
CEED_QFUNCTION_HELPER void MatVec3(const CeedScalar A[3][3], const CeedScalar x[3], const CeedTransposeMode transpose_A, CeedScalar b[3]) {
  MatVecNM((const CeedScalar *)A, (const CeedScalar *)x, 3, 3, transpose_A, (CeedScalar *)b);
}

// @brief 2x2 Matrix vector product  b = Ax + b.
CEED_QFUNCTION_HELPER void MatVec2(const CeedScalar A[2][2], const CeedScalar x[2], const CeedTransposeMode transpose_A, CeedScalar b[2]) {
  MatVecNM((const CeedScalar *)A, (const CeedScalar *)x, 2, 2, transpose_A, (CeedScalar *)b);
}

// @brief Matrix-Matrix product, B = DA + B, where D is diagonal.
// @details A is NxM, D is diagonal NxN, represented by a vector of length N, and B is NxM. Optionally, A may be transposed.
CEED_QFUNCTION_HELPER void MatDiagNM(const CeedScalar *A, const CeedScalar *D, const CeedInt N, const CeedInt M, const CeedTransposeMode transpose_A,
                                     CeedScalar *B) {
  switch (transpose_A) {
    case CEED_NOTRANSPOSE:
      CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) { CeedPragmaSIMD for (CeedInt j = 0; j < M; j++) B[i * M + j] += D[i] * A[i * M + j]; }
      break;
    case CEED_TRANSPOSE:
      CeedPragmaSIMD for (CeedInt i = 0; i < M; i++) { CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) B[i * N + j] += D[i] * A[j * M + i]; }
      break;
  }
}

// @brief 3x3 Matrix-Matrix product, B = DA + B, where D is diagonal.
// @details Optionally, A may be transposed.
CEED_QFUNCTION_HELPER void MatDiag3(const CeedScalar A[3][3], const CeedScalar D[3], const CeedTransposeMode transpose_A, CeedScalar B[3][3]) {
  MatDiagNM((const CeedScalar *)A, (const CeedScalar *)D, 3, 3, transpose_A, (CeedScalar *)B);
}
// @brief NxN Matrix-Matrix product, C = AB + C
CEED_QFUNCTION_HELPER void MatMatN(const CeedScalar *A, const CeedScalar *B, const CeedInt N, const CeedTransposeMode transpose_A,
                                   const CeedTransposeMode transpose_B, CeedScalar *C) {
  switch (transpose_A) {
    case CEED_NOTRANSPOSE:
      switch (transpose_B) {
        case CEED_NOTRANSPOSE:
          CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) {
            CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) {
              CeedPragmaSIMD for (CeedInt k = 0; k < N; k++) C[i * N + j] += A[i * N + k] * B[k * N + j];
            }
          }
          break;
        case CEED_TRANSPOSE:
          CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) {
            CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) {
              CeedPragmaSIMD for (CeedInt k = 0; k < N; k++) C[i * N + j] += A[i * N + k] * B[j * N + k];
            }
          }
          break;
      }
      break;
    case CEED_TRANSPOSE:
      switch (transpose_B) {
        case CEED_NOTRANSPOSE:
          CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) {
            CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) {
              CeedPragmaSIMD for (CeedInt k = 0; k < N; k++) C[i * N + j] += A[k * N + i] * B[k * N + j];
            }
          }
          break;
        case CEED_TRANSPOSE:
          CeedPragmaSIMD for (CeedInt i = 0; i < N; i++) {
            CeedPragmaSIMD for (CeedInt j = 0; j < N; j++) {
              CeedPragmaSIMD for (CeedInt k = 0; k < N; k++) C[i * N + j] += A[k * N + i] * B[j * N + k];
            }
          }
          break;
      }
      break;
  }
}

// @brief 3x3 Matrix-Matrix product, C = AB + C
CEED_QFUNCTION_HELPER void MatMat3(const CeedScalar A[3][3], const CeedScalar B[3][3], const CeedTransposeMode transpose_A,
                                   const CeedTransposeMode transpose_B, CeedScalar C[3][3]) {
  MatMatN((const CeedScalar *)A, (const CeedScalar *)B, 3, transpose_A, transpose_B, (CeedScalar *)C);
}

// @brief 2x2 Matrix-Matrix product, C = AB + C
CEED_QFUNCTION_HELPER void MatMat2(const CeedScalar A[2][2], const CeedScalar B[2][2], const CeedTransposeMode transpose_A,
                                   const CeedTransposeMode transpose_B, CeedScalar C[2][2]) {
  MatMatN((const CeedScalar *)A, (const CeedScalar *)B, 2, transpose_A, transpose_B, (CeedScalar *)C);
}

/**
 * @brief Calculate inverse of 2x2 matrix
 *
 * @param[in]  A        Input matrix
 * @param[out] detJ_ptr Determinate of A, may be NULL is not desired
 * @param[out] A_inv    Output matrix inverse
 */
CEED_QFUNCTION_HELPER void MatInv2(const CeedScalar A[2][2], CeedScalar A_inv[2][2], CeedScalar *detJ_ptr) {
  const CeedScalar detJ = A[0][0] * A[1][1] - A[1][0] * A[0][1];

  A_inv[0][0] = A[1][1] / detJ;
  A_inv[0][1] = -A[0][1] / detJ;
  A_inv[1][0] = -A[1][0] / detJ;
  A_inv[1][1] = A[0][0] / detJ;
  if (detJ_ptr) *detJ_ptr = detJ;
}

/**
 * @brief Calculate inverse of 3x3 matrix
 *
 * @param[in]  A        Input matrix
 * @param[out] detJ_ptr Determinate of A, may be NULL is not desired
 * @param[out] A_inv    Output matrix inverse
 */
CEED_QFUNCTION_HELPER void MatInv3(const CeedScalar A[3][3], CeedScalar A_inv[3][3], CeedScalar *detJ_ptr) {
  // Compute Adjugate of dxdX
  A_inv[0][0] = A[1][1] * A[2][2] - A[1][2] * A[2][1];
  A_inv[0][1] = A[0][2] * A[2][1] - A[0][1] * A[2][2];
  A_inv[0][2] = A[0][1] * A[1][2] - A[0][2] * A[1][1];
  A_inv[1][0] = A[1][2] * A[2][0] - A[1][0] * A[2][2];
  A_inv[1][1] = A[0][0] * A[2][2] - A[0][2] * A[2][0];
  A_inv[1][2] = A[0][2] * A[1][0] - A[0][0] * A[1][2];
  A_inv[2][0] = A[1][0] * A[2][1] - A[1][1] * A[2][0];
  A_inv[2][1] = A[0][1] * A[2][0] - A[0][0] * A[2][1];
  A_inv[2][2] = A[0][0] * A[1][1] - A[0][1] * A[1][0];

  const CeedScalar detJ = A[0][0] * A_inv[0][0] + A[1][0] * A_inv[0][1] + A[2][0] * A_inv[0][2];
  ScaleN((CeedScalar *)A_inv, 1 / detJ, 9);
  if (detJ_ptr) *detJ_ptr = detJ;
}

/**
  @brief MxN Matrix-Matrix product, C = AB + C

  C is NxM, A is NxP, B is PxM

  @param[in]  mat_A Row-major matrix `A`
  @param[in]  mat_B Row-major matrix `B`
  @param[out] mat_C Row-major output matrix `C`
  @param[in]  N     Number of rows of `C`
  @param[in]  M     Number of columns of `C`
  @param[in]  P     Number of columns of `A`/rows of `B`
**/
CEED_QFUNCTION_HELPER void MatMatNM(const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt N, CeedInt M, CeedInt P) {
  for (CeedInt i = 0; i < N; i++) {
    for (CeedInt j = 0; j < M; j++) {
      for (CeedInt k = 0; k < P; k++) mat_C[i * M + j] += mat_A[i * P + k] * mat_B[k * M + j];
    }
  }
}

// @brief Unpack Kelvin-Mandel notation symmetric tensor into full tensor
CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) {
  const CeedScalar weight = 1 / sqrt(2.);
  A[0][0]                 = v[0];
  A[1][1]                 = v[1];
  A[2][2]                 = v[2];
  A[2][1] = A[1][2] = weight * v[3];
  A[2][0] = A[0][2] = weight * v[4];
  A[1][0] = A[0][1] = weight * v[5];
}

// @brief Pack full tensor into Kelvin-Mandel notation symmetric tensor
CEED_QFUNCTION_HELPER void KMPack(const CeedScalar A[3][3], CeedScalar v[6]) {
  const CeedScalar weight = sqrt(2.);
  v[0]                    = A[0][0];
  v[1]                    = A[1][1];
  v[2]                    = A[2][2];
  v[3]                    = A[2][1] * weight;
  v[4]                    = A[2][0] * weight;
  v[5]                    = A[1][0] * weight;
}

// @brief Calculate metric tensor from mapping, g_{ij} = xi_{k,i} xi_{k,j} = dXdx^T dXdx
CEED_QFUNCTION_HELPER void KMMetricTensor(const CeedScalar dXdx[3][3], CeedScalar km_g_ij[6]) {
  CeedScalar g_ij[3][3] = {{0.}};
  MatMat3(dXdx, dXdx, CEED_TRANSPOSE, CEED_NOTRANSPOSE, g_ij);
  KMPack(g_ij, km_g_ij);
}

/**
  @brief Linear ramp evaluation from set amplitude to zero

      ▲
      │
     a│-------┬.
      │       ┊ `-.
      │       ┊    `-.
      │       ┊       `-.______
      └───────┴─────────┴────────> x
              s        s+l

  where "a" is `amplitude`, "s" is `start`, and "l" is `length`.

  @param[in] amplitude Maximum value of the ramp
  @param[in] length    Length of the ramp
  @param[in] start     Location where ramp begins to reduce from `amplitude` to 0
  @param[in] x         Input location
  @return Value of linear ramp function
**/
CEED_QFUNCTION_HELPER CeedScalar LinearRampCoefficient(CeedScalar amplitude, CeedScalar length, CeedScalar start, CeedScalar x) {
  if (x < start) {
    return amplitude;
  } else if (x < start + length) {
    return amplitude * ((x - start) * (-1 / length) + 1);
  } else {
    return 0;
  }
}

/**
  @brief Pack stored values at quadrature point

  @param[in]   Q              Number of quadrature points
  @param[in]   i              Current quadrature point
  @param[in]   start          Starting index to store components
  @param[in]   num_comp       Number of components to store
  @param[in]   values_at_qpnt Local values for quadrature point i
  @param[out]  stored         Stored values

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int StoredValuesPack(CeedInt Q, CeedInt i, CeedInt start, CeedInt num_comp, const CeedScalar *values_at_qpnt,
                                           CeedScalar *stored) {
  for (CeedInt j = 0; j < num_comp; j++) stored[(start + j) * Q + i] = values_at_qpnt[j];

  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack stored values at quadrature point

  @param[in]   Q              Number of quadrature points
  @param[in]   i              Current quadrature point
  @param[in]   start          Starting index to store components
  @param[in]   num_comp       Number of components to store
  @param[in]   stored         Stored values
  @param[out]  values_at_qpnt Local values for quadrature point i

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int StoredValuesUnpack(CeedInt Q, CeedInt i, CeedInt start, CeedInt num_comp, const CeedScalar *stored,
                                             CeedScalar *values_at_qpnt) {
  for (CeedInt j = 0; j < num_comp; j++) values_at_qpnt[j] = stored[(start + j) * Q + i];

  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack N-D element q_data at quadrature point

  @param[in]   dim       Dimension of the element
  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:Setup`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [dim][dim]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataUnpack_ND(CeedInt dim, CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar *dXdx) {
  switch (dim) {
    case 2:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (dXdx) StoredValuesUnpack(Q, i, 1, 4, q_data, dXdx);
      break;
    case 3:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (dXdx) StoredValuesUnpack(Q, i, 1, 9, q_data, dXdx);
      break;
  }
  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack boundary element q_data for N-D problem at quadrature point

  @param[in]   dim       Dimension of the element
  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundary`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [dim - 1][dim]), or `NULL`
  @param[out]  normal    Components of the normal vector (shape [dim]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryUnpack_ND(CeedInt dim, CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar *dXdx,
                                                 CeedScalar *normal) {
  switch (dim) {
    case 2:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (normal) StoredValuesUnpack(Q, i, 1, 2, q_data, normal);
      break;
    case 3:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (normal) StoredValuesUnpack(Q, i, 1, 3, q_data, normal);
      if (dXdx) StoredValuesUnpack(Q, i, 4, 6, q_data, (CeedScalar *)dXdx);
      break;
  }
  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack boundary element q_data for N-D problem at quadrature point

  @param[in]   dim       Dimension of the element
  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundaryGradient`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [dim][dim]), or `NULL`
  @param[out]  normal    Components of the normal vector (shape [dim]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryGradientUnpack_ND(CeedInt dim, CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ,
                                                         CeedScalar *dXdx, CeedScalar *normal) {
  switch (dim) {
    case 2:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (dXdx) StoredValuesUnpack(Q, i, 1, 4, q_data, dXdx);
      if (normal) StoredValuesUnpack(Q, i, 5, 2, q_data, normal);
      break;
    case 3:
      if (wdetJ) StoredValuesUnpack(Q, i, 0, 1, q_data, wdetJ);
      if (dXdx) StoredValuesUnpack(Q, i, 1, 9, q_data, dXdx);
      if (normal) StoredValuesUnpack(Q, i, 10, 3, q_data, normal);
      break;
  }
  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack 3D element q_data at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:Setup`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [3][3])

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataUnpack_3D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar dXdx[3][3]) {
  return QdataUnpack_ND(3, Q, i, q_data, wdetJ, (CeedScalar *)dXdx);
}

/**
  @brief Unpack boundary element q_data for 3D problem at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundary`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [2][3]), or `NULL`
  @param[out]  normal    Components of the normal vector (shape [3]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryUnpack_3D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar dXdx[2][3],
                                                 CeedScalar normal[3]) {
  return QdataBoundaryUnpack_ND(3, Q, i, q_data, wdetJ, (CeedScalar *)dXdx, normal);
}

/**
  @brief Unpack boundary element q_data for 3D problem at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundary`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [3][3]), or `NULL`
  @param[out]  normal    Components of the normal vector (shape [3]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryGradientUnpack_3D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar dXdx[3][3],
                                                         CeedScalar normal[3]) {
  return QdataBoundaryGradientUnpack_ND(3, Q, i, q_data, wdetJ, (CeedScalar *)dXdx, normal);
}

/**
  @brief Unpack 2D element q_data at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:Setup`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [2][2])

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataUnpack_2D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar dXdx[2][2]) {
  QdataUnpack_ND(2, Q, i, q_data, wdetJ, (CeedScalar *)dXdx);
  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack boundary element q_data for 2D problem at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundary2d`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  normal    Components of the normal vector (shape [2]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryUnpack_2D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar normal[2]) {
  QdataBoundaryUnpack_ND(3, Q, i, q_data, wdetJ, NULL, normal);
  return CEED_ERROR_SUCCESS;
}

/**
  @brief Unpack boundary element q_data for 2D problem at quadrature point

  @param[in]   Q         Number of quadrature points
  @param[in]   i         Current quadrature point
  @param[in]   q_data    Pointer to q_data (generated by `setupgeo.h:SetupBoundary`)
  @param[out]  wdetJ     Quadrature weight times determinant of the mapping Jacobian, or `NULL`
  @param[out]  dXdx      Inverse of the mapping Jacobian (shape [2][2]), or `NULL`
  @param[out]  normal    Components of the normal vector (shape [2]), or `NULL`

  @return An error code: 0 - success, otherwise - failure
**/
CEED_QFUNCTION_HELPER int QdataBoundaryGradientUnpack_2D(CeedInt Q, CeedInt i, const CeedScalar *q_data, CeedScalar *wdetJ, CeedScalar dXdx[2][2],
                                                         CeedScalar normal[2]) {
  return QdataBoundaryGradientUnpack_ND(2, Q, i, q_data, wdetJ, (CeedScalar *)dXdx, normal);
}

/**
  @brief Unpack `CEED_EVAL_GRAD` QF input into quadrature-point local array

  @param[in]  Q          Number of quadrature points
  @param[in]  i          Current quadrature point
  @param[in]  num_comp   Number of components of the input
  @param[in]  dim        Topological dimension of the element (ie. number of derivative terms per component)
  @param[in]  grad       QF gradient input, shape `[dim][num_comp][Q]`
  @param[out] grad_local Gradient array at quadrature point Q, shape `[num_comp][dim]`
**/
CEED_QFUNCTION_HELPER void GradUnpackND(CeedInt Q, CeedInt i, CeedInt num_comp, CeedInt dim, const CeedScalar *grad, CeedScalar *grad_local) {
  for (CeedInt d = 0; d < dim; d++) {
    for (CeedInt c = 0; c < num_comp; c++) {
      grad_local[dim * c + d] = grad[(Q * num_comp) * d + Q * c + i];
    }
  }
}

/**
  @brief Unpack `CEED_EVAL_GRAD` QF input into quadrature-point local array for 3D elements

  @param[in]  Q          Number of quadrature points
  @param[in]  i          Current quadrature point
  @param[in]  num_comp   Number of components of the input
  @param[in]  grad       QF gradient input, shape `[3][num_comp][Q]`
  @param[out] grad_local Gradient array at quadrature point Q, shape `[num_comp][3]`
**/
CEED_QFUNCTION_HELPER void GradUnpack3D(CeedInt Q, CeedInt i, CeedInt num_comp, const CeedScalar *grad, CeedScalar (*grad_local)[3]) {
  GradUnpackND(Q, i, num_comp, 3, grad, (CeedScalar *)grad_local);
}

/**
  @brief Unpack `CEED_EVAL_GRAD` QF input into quadrature-point local array for 2D elements

  @param[in]  Q          Number of quadrature points
  @param[in]  i          Current quadrature point
  @param[in]  num_comp   Number of components of the input
  @param[in]  grad       QF gradient input, shape `[2][num_comp][Q]`
  @param[out] grad_local Gradient array at quadrature point Q, shape `[num_comp][2]`
**/
CEED_QFUNCTION_HELPER void GradUnpack2D(CeedInt Q, CeedInt i, CeedInt num_comp, const CeedScalar *grad, CeedScalar (*grad_local)[2]) {
  GradUnpackND(Q, i, num_comp, 2, grad, (CeedScalar *)grad_local);
}

/**
  @brief Calculate divergence from reference gradient

  Given gradient array G_{ij} and inverse element mapping X_{ij}, then the divergence is

  G_{ij} X{ji}

  @param[in]  grad_qn    Gradient array, orientation [vector component][gradient direction]
  @param[in]  dXdx       Inverse of the mapping Jacobian (shape [dim][dim])
  @param[in]  dim        Dimension of the problem
  @param[out] divergence The divergence
**/
CEED_QFUNCTION_HELPER void DivergenceND(const CeedScalar *grad_qn, const CeedScalar *dXdx, const CeedInt dim, CeedScalar *divergence) {
  for (CeedInt i = 0; i < dim; i++) {
    for (CeedInt j = 0; j < dim; j++) {
      *divergence += grad_qn[i * dim + j] * dXdx[j * dim + i];
    }
  }
}

/**
  @brief Calculate divergence from reference gradient for 3D problem

  Given gradient array G_{ij} and inverse element mapping X_{ij}, then the divergence is

  G_{ij} X{ji}

  @param[in]  grad_qn    Gradient array, orientation [vector component][gradient direction]
  @param[in]  dXdx       Inverse of the mapping Jacobian (shape [3][3])
  @param[out] divergence The divergence
**/
CEED_QFUNCTION_HELPER void Divergence3D(const CeedScalar grad_qn[3][3], const CeedScalar dXdx[3][3], CeedScalar *divergence) {
  DivergenceND((const CeedScalar *)grad_qn, (const CeedScalar *)dXdx, 3, divergence);
}
