/*
     Defines the basic matrix operations for sequential dense.
     Portions of this code are under:
     Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
*/

#include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
#include <../src/mat/impls/dense/mpi/mpidense.h>
#include <petscblaslapack.h>
#include <../src/mat/impls/aij/seq/aij.h>
#include <petsc/private/vecimpl.h>

PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      j, k, n = A->rmap->n;
  PetscScalar  *v;

  PetscFunctionBegin;
  PetscCheck(A->rmap->n == A->cmap->n, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot symmetrize a rectangular matrix");
  PetscCall(MatDenseGetArray(A, &v));
  if (!hermitian) {
    for (k = 0; k < n; k++) {
      for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
    }
  } else {
    for (k = 0; k < n; k++) {
      for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
    }
  }
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info, n;

  PetscFunctionBegin;
  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(PetscBLASIntCast(A->cmap->n, &n));
  if (A->factortype == MAT_FACTOR_LU) {
    PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
    if (!mat->fwork) {
      mat->lfwork = n;
      PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
    }
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
    PetscCall(PetscFPTrapPop());
    PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
  } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
    if (A->spd == PETSC_BOOL3_TRUE) {
      PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
      PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
      PetscCall(PetscFPTrapPop());
      PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
#if defined(PETSC_USE_COMPLEX)
    } else if (A->hermitian == PETSC_BOOL3_TRUE) {
      PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
      PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
      PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
      PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
      PetscCall(PetscFPTrapPop());
      PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_TRUE));
#endif
    } else { /* symmetric case */
      PetscCheck(mat->pivots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Pivots not present");
      PetscCheck(mat->fwork, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Fwork not present");
      PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
      PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
      PetscCall(PetscFPTrapPop());
      PetscCall(MatSeqDenseSymmetrize_Private(A, PETSC_FALSE));
    }
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad Inversion: zero pivot in row %" PetscBLASInt_FMT, info - 1);
    PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
  } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");

  A->ops->solve             = NULL;
  A->ops->matsolve          = NULL;
  A->ops->solvetranspose    = NULL;
  A->ops->matsolvetranspose = NULL;
  A->ops->solveadd          = NULL;
  A->ops->solvetransposeadd = NULL;
  A->factortype             = MAT_FACTOR_NONE;
  PetscCall(PetscFree(A->solvertype));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
{
  Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
  PetscInt           m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
  PetscScalar       *slot, *bb, *v;
  const PetscScalar *xx;

  PetscFunctionBegin;
  if (PetscDefined(USE_DEBUG)) {
    for (i = 0; i < N; i++) {
      PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
      PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
      PetscCheck(rows[i] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col %" PetscInt_FMT " requested to be zeroed greater than or equal number of cols %" PetscInt_FMT, rows[i], A->cmap->n);
    }
  }
  if (!N) PetscFunctionReturn(PETSC_SUCCESS);

  /* fix right-hand side if needed */
  if (x && b) {
    Vec xt;

    PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
    PetscCall(VecDuplicate(x, &xt));
    PetscCall(VecCopy(x, xt));
    PetscCall(VecScale(xt, -1.0));
    PetscCall(MatMultAdd(A, xt, b, b));
    PetscCall(VecDestroy(&xt));
    PetscCall(VecGetArrayRead(x, &xx));
    PetscCall(VecGetArray(b, &bb));
    for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
    PetscCall(VecRestoreArrayRead(x, &xx));
    PetscCall(VecRestoreArray(b, &bb));
  }

  PetscCall(MatDenseGetArray(A, &v));
  for (i = 0; i < N; i++) {
    slot = v + rows[i] * m;
    PetscCall(PetscArrayzero(slot, r));
  }
  for (i = 0; i < N; i++) {
    slot = v + rows[i];
    for (j = 0; j < n; j++) {
      *slot = 0.0;
      slot += m;
    }
  }
  if (diag != 0.0) {
    PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
    for (i = 0; i < N; i++) {
      slot  = v + (m + 1) * rows[i];
      *slot = diag;
    }
  }
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
{
  Mat              B = NULL;
  Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data;
  Mat_SeqDense    *b;
  PetscInt        *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
  const MatScalar *av;
  PetscBool        isseqdense;

  PetscFunctionBegin;
  if (reuse == MAT_REUSE_MATRIX) {
    PetscCall(PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense));
    PetscCheck(isseqdense, PetscObjectComm((PetscObject)*newmat), PETSC_ERR_USER, "Cannot reuse matrix of type %s", ((PetscObject)*newmat)->type_name);
  }
  if (reuse != MAT_REUSE_MATRIX) {
    PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
    PetscCall(MatSetSizes(B, m, n, m, n));
    PetscCall(MatSetType(B, MATSEQDENSE));
    PetscCall(MatSeqDenseSetPreallocation(B, NULL));
    b = (Mat_SeqDense *)B->data;
  } else {
    b = (Mat_SeqDense *)((*newmat)->data);
    for (i = 0; i < n; i++) PetscCall(PetscArrayzero(b->v + i * b->lda, m));
  }
  PetscCall(MatSeqAIJGetArrayRead(A, &av));
  for (i = 0; i < m; i++) {
    PetscInt j;
    for (j = 0; j < ai[1] - ai[0]; j++) {
      b->v[*aj * b->lda + i] = *av;
      aj++;
      av++;
    }
    ai++;
  }
  PetscCall(MatSeqAIJRestoreArrayRead(A, &av));

  if (reuse == MAT_INPLACE_MATRIX) {
    PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
    PetscCall(MatHeaderReplace(A, &B));
  } else {
    if (B) *newmat = B;
    PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
{
  Mat           B = NULL;
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;
  PetscInt      i, j;
  PetscInt     *rows, *nnz;
  MatScalar    *aa = a->v, *vals;

  PetscFunctionBegin;
  PetscCall(PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals));
  if (reuse != MAT_REUSE_MATRIX) {
    PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
    PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
    PetscCall(MatSetType(B, MATSEQAIJ));
    for (j = 0; j < A->cmap->n; j++) {
      for (i = 0; i < A->rmap->n; i++)
        if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
      aa += a->lda;
    }
    PetscCall(MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz));
  } else B = *newmat;
  aa = a->v;
  for (j = 0; j < A->cmap->n; j++) {
    PetscInt numRows = 0;
    for (i = 0; i < A->rmap->n; i++)
      if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
        rows[numRows]   = i;
        vals[numRows++] = aa[i];
      }
    PetscCall(MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES));
    aa += a->lda;
  }
  PetscCall(PetscFree3(rows, nnz, vals));
  PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

  if (reuse == MAT_INPLACE_MATRIX) {
    PetscCall(MatHeaderReplace(A, &B));
  } else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
{
  Mat_SeqDense      *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
  const PetscScalar *xv;
  PetscScalar       *yv;
  PetscBLASInt       N, m, ldax = 0, lday = 0, one = 1;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArrayRead(X, &xv));
  PetscCall(MatDenseGetArray(Y, &yv));
  PetscCall(PetscBLASIntCast(X->rmap->n * X->cmap->n, &N));
  PetscCall(PetscBLASIntCast(X->rmap->n, &m));
  PetscCall(PetscBLASIntCast(x->lda, &ldax));
  PetscCall(PetscBLASIntCast(y->lda, &lday));
  if (ldax > m || lday > m) {
    for (PetscInt j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, PetscSafePointerPlusOffset(xv, j * ldax), &one, PetscSafePointerPlusOffset(yv, j * lday), &one));
  } else {
    PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
  }
  PetscCall(MatDenseRestoreArrayRead(X, &xv));
  PetscCall(MatDenseRestoreArray(Y, &yv));
  PetscCall(PetscLogFlops(PetscMax(2.0 * N - 1, 0)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
{
  PetscLogDouble N = A->rmap->n * A->cmap->n;

  PetscFunctionBegin;
  info->block_size        = 1.0;
  info->nz_allocated      = N;
  info->nz_used           = N;
  info->nz_unneeded       = 0;
  info->assemblies        = A->num_ass;
  info->mallocs           = 0;
  info->memory            = 0; /* REVIEW ME */
  info->fill_ratio_given  = 0;
  info->fill_ratio_needed = 0;
  info->factor_mallocs    = 0;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;
  PetscScalar  *v;
  PetscBLASInt  one = 1, j, nz, lda = 0;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &v));
  PetscCall(PetscBLASIntCast(a->lda, &lda));
  if (lda > A->rmap->n) {
    PetscCall(PetscBLASIntCast(A->rmap->n, &nz));
    for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
  } else {
    PetscCall(PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz));
    PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
  }
  PetscCall(PetscLogFlops(A->rmap->n * A->cmap->n));
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;
  PetscScalar  *v;
  PetscInt      j, k;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &v));
  k = PetscMin(A->rmap->n, A->cmap->n);
  for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
  PetscCall(PetscLogFlops(k));
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscInt           i, j, m = A->rmap->n, N = a->lda;
  const PetscScalar *v;

  PetscFunctionBegin;
  *fl = PETSC_FALSE;
  if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(MatDenseGetArrayRead(A, &v));
  for (i = 0; i < m; i++) {
    for (j = i; j < m; j++) {
      if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
    }
  }
  *fl = PETSC_TRUE;
restore:
  PetscCall(MatDenseRestoreArrayRead(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscInt           i, j, m = A->rmap->n, N = a->lda;
  const PetscScalar *v;

  PetscFunctionBegin;
  *fl = PETSC_FALSE;
  if (A->rmap->n != A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(MatDenseGetArrayRead(A, &v));
  for (i = 0; i < m; i++) {
    for (j = i; j < m; j++) {
      if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
    }
  }
  *fl = PETSC_TRUE;
restore:
  PetscCall(MatDenseRestoreArrayRead(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      lda = mat->lda, j, m, nlda = lda;
  PetscBool     isdensecpu;

  PetscFunctionBegin;
  PetscCall(PetscLayoutReference(A->rmap, &newi->rmap));
  PetscCall(PetscLayoutReference(A->cmap, &newi->cmap));
  if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
    PetscCall(MatDenseSetLDA(newi, lda));
  }
  PetscCall(PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu));
  if (isdensecpu) PetscCall(MatSeqDenseSetPreallocation(newi, NULL));
  if (cpvalues == MAT_COPY_VALUES) {
    const PetscScalar *av;
    PetscScalar       *v;

    PetscCall(MatDenseGetArrayRead(A, &av));
    PetscCall(MatDenseGetArrayWrite(newi, &v));
    PetscCall(MatDenseGetLDA(newi, &nlda));
    m = A->rmap->n;
    if (lda > m || nlda > m) {
      for (j = 0; j < A->cmap->n; j++) PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(v, j * nlda), PetscSafePointerPlusOffset(av, j * lda), m));
    } else {
      PetscCall(PetscArraycpy(v, av, A->rmap->n * A->cmap->n));
    }
    PetscCall(MatDenseRestoreArrayWrite(newi, &v));
    PetscCall(MatDenseRestoreArrayRead(A, &av));
    PetscCall(MatPropagateSymmetryOptions(A, newi));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
{
  PetscFunctionBegin;
  PetscCall(MatCreate(PetscObjectComm((PetscObject)A), newmat));
  PetscCall(MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
  PetscCall(MatSetType(*newmat, ((PetscObject)A)->type_name));
  PetscCall(MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info;

  PetscFunctionBegin;
  PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
  PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
  PetscCall(PetscFPTrapPop());
  PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve %" PetscBLASInt_FMT, info);
  PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatConjugate_SeqDense(Mat);

static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info;

  PetscFunctionBegin;
  if (A->spd == PETSC_BOOL3_TRUE) {
    if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
    PetscCall(PetscFPTrapPop());
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "POTRS Bad solve %" PetscBLASInt_FMT, info);
    if (PetscDefined(USE_COMPLEX) && T) PetscCall(MatConjugate_SeqDense(A));
#if defined(PETSC_USE_COMPLEX)
  } else if (A->hermitian == PETSC_BOOL3_TRUE) {
    if (T) PetscCall(MatConjugate_SeqDense(A));
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
    PetscCall(PetscFPTrapPop());
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "HETRS Bad solve %" PetscBLASInt_FMT, info);
    if (T) PetscCall(MatConjugate_SeqDense(A));
#endif
  } else { /* symmetric case */
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
    PetscCall(PetscFPTrapPop());
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "SYTRS Bad solve %" PetscBLASInt_FMT, info);
  }
  PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info;
  char          trans;

  PetscFunctionBegin;
  if (PetscDefined(USE_COMPLEX)) {
    trans = 'C';
  } else {
    trans = 'T';
  }
  PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
  { /* lwork depends on the number of right-hand sides */
    PetscBLASInt nlfwork, lfwork = -1;
    PetscScalar  fwork;

    PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
    nlfwork = (PetscBLASInt)PetscRealPart(fwork);
    if (nlfwork > mat->lfwork) {
      mat->lfwork = nlfwork;
      PetscCall(PetscFree(mat->fwork));
      PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
    }
  }
  PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
  PetscCall(PetscFPTrapPop());
  PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
  PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
  PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
  PetscCall(PetscFPTrapPop());
  PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
  for (PetscInt j = 0; j < nrhs; j++) {
    for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
  }
  PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info;

  PetscFunctionBegin;
  if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
    PetscCall(PetscFPTrapPop());
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "TRTRS - Bad triangular solve %" PetscBLASInt_FMT, info);
    if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
    { /* lwork depends on the number of right-hand sides */
      PetscBLASInt nlfwork, lfwork = -1;
      PetscScalar  fwork;

      PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
      nlfwork = (PetscBLASInt)PetscRealPart(fwork);
      if (nlfwork > mat->lfwork) {
        mat->lfwork = nlfwork;
        PetscCall(PetscFree(mat->fwork));
        PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
      }
    }
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
    PetscCall(PetscFPTrapPop());
    PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "ORMQR - Bad orthogonal transform %" PetscBLASInt_FMT, info);
    if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate_SeqDense(A));
  } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
  PetscCall(PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank))));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscScalar  *y;
  PetscBLASInt  m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  PetscCall(PetscBLASIntCast(A->cmap->n, &k));
  if (k < m) {
    PetscCall(VecCopy(xx, mat->qrrhs));
    PetscCall(VecGetArray(mat->qrrhs, &y));
  } else {
    PetscCall(VecCopy(xx, yy));
    PetscCall(VecGetArray(yy, &y));
  }
  *_y = y;
  *_k = k;
  *_m = m;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscScalar  *y   = NULL;
  PetscBLASInt  m, k;

  PetscFunctionBegin;
  y   = *_y;
  *_y = NULL;
  k   = *_k;
  m   = *_m;
  if (k < m) {
    PetscScalar *yv;
    PetscCall(VecGetArray(yy, &yv));
    PetscCall(PetscArraycpy(yv, y, k));
    PetscCall(VecRestoreArray(yy, &yv));
    PetscCall(VecRestoreArray(mat->qrrhs, &y));
  } else {
    PetscCall(VecRestoreArray(yy, &y));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
{
  PetscScalar *y = NULL;
  PetscBLASInt m = 0, k = 0;

  PetscFunctionBegin;
  PetscCall(MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k));
  PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k));
  PetscCall(MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
{
  const PetscScalar *b;
  PetscScalar       *y;
  PetscInt           n, _ldb, _ldx;
  PetscBLASInt       nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;

  PetscFunctionBegin;
  *_ldy  = 0;
  *_m    = 0;
  *_nrhs = 0;
  *_k    = 0;
  *_y    = NULL;
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  PetscCall(PetscBLASIntCast(A->cmap->n, &k));
  PetscCall(MatGetSize(B, NULL, &n));
  PetscCall(PetscBLASIntCast(n, &nrhs));
  PetscCall(MatDenseGetLDA(B, &_ldb));
  PetscCall(PetscBLASIntCast(_ldb, &ldb));
  PetscCall(MatDenseGetLDA(X, &_ldx));
  PetscCall(PetscBLASIntCast(_ldx, &ldx));
  if (ldx < m) {
    PetscCall(MatDenseGetArrayRead(B, &b));
    PetscCall(PetscMalloc1(nrhs * m, &y));
    if (ldb == m) {
      PetscCall(PetscArraycpy(y, b, ldb * nrhs));
    } else {
      for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * m], &b[j * ldb], m));
    }
    ldy = m;
    PetscCall(MatDenseRestoreArrayRead(B, &b));
  } else {
    if (ldb == ldx) {
      PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
      PetscCall(MatDenseGetArray(X, &y));
    } else {
      PetscCall(MatDenseGetArray(X, &y));
      PetscCall(MatDenseGetArrayRead(B, &b));
      for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&y[j * ldx], &b[j * ldb], m));
      PetscCall(MatDenseRestoreArrayRead(B, &b));
    }
    ldy = ldx;
  }
  *_y    = y;
  *_ldy  = ldy;
  *_k    = k;
  *_m    = m;
  *_nrhs = nrhs;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
{
  PetscScalar *y;
  PetscInt     _ldx;
  PetscBLASInt k, ldy, nrhs, ldx = 0;

  PetscFunctionBegin;
  y    = *_y;
  *_y  = NULL;
  k    = *_k;
  ldy  = *_ldy;
  nrhs = *_nrhs;
  PetscCall(MatDenseGetLDA(X, &_ldx));
  PetscCall(PetscBLASIntCast(_ldx, &ldx));
  if (ldx != ldy) {
    PetscScalar *xv;
    PetscCall(MatDenseGetArray(X, &xv));
    for (PetscInt j = 0; j < nrhs; j++) PetscCall(PetscArraycpy(&xv[j * ldx], &y[j * ldy], k));
    PetscCall(MatDenseRestoreArray(X, &xv));
    PetscCall(PetscFree(y));
  } else {
    PetscCall(MatDenseRestoreArray(X, &y));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
{
  PetscScalar *y;
  PetscBLASInt m, k, ldy, nrhs;

  PetscFunctionBegin;
  PetscCall(MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscCall(MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k));
  PetscCall(MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* COMMENT: I have chosen to hide row permutation in the pivots,
   rather than put it in the Mat->row slot.*/
PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  n, m, info;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->cmap->n, &n));
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); }
  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
  PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
  PetscCall(PetscFPTrapPop());

  PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to LU factorization %" PetscBLASInt_FMT, info);
  PetscCheck(info <= 0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Bad LU factorization %" PetscBLASInt_FMT, info);

  A->ops->solve             = MatSolve_SeqDense_LU;
  A->ops->matsolve          = MatMatSolve_SeqDense_LU;
  A->ops->solvetranspose    = MatSolveTranspose_SeqDense_LU;
  A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
  A->factortype             = MAT_FACTOR_LU;

  PetscCall(PetscFree(A->solvertype));
  PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));

  PetscCall(PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
{
  MatFactorInfo info;

  PetscFunctionBegin;
  PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
  PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info);
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
{
  PetscFunctionBegin;
  fact->preallocated         = PETSC_TRUE;
  fact->assembled            = PETSC_TRUE;
  fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  info, n;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->cmap->n, &n));
  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  if (A->spd == PETSC_BOOL3_TRUE) {
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
    PetscCall(PetscFPTrapPop());
#if defined(PETSC_USE_COMPLEX)
  } else if (A->hermitian == PETSC_BOOL3_TRUE) {
    if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); }
    if (!mat->fwork) {
      PetscScalar dummy;

      mat->lfwork = -1;
      PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
      PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
      PetscCall(PetscFPTrapPop());
      PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
      PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
    }
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
    PetscCall(PetscFPTrapPop());
#endif
  } else { /* symmetric case */
    if (!mat->pivots) { PetscCall(PetscMalloc1(A->rmap->n, &mat->pivots)); }
    if (!mat->fwork) {
      PetscScalar dummy;

      mat->lfwork = -1;
      PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
      PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
      PetscCall(PetscFPTrapPop());
      PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
      PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
    }
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
    PetscCall(PetscFPTrapPop());
  }
  PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Bad factorization: zero pivot in row %" PetscBLASInt_FMT, info - 1);

  A->ops->solve             = MatSolve_SeqDense_Cholesky;
  A->ops->matsolve          = MatMatSolve_SeqDense_Cholesky;
  A->ops->solvetranspose    = MatSolveTranspose_SeqDense_Cholesky;
  A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
  A->factortype             = MAT_FACTOR_CHOLESKY;

  PetscCall(PetscFree(A->solvertype));
  PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));

  PetscCall(PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
{
  MatFactorInfo info;

  PetscFunctionBegin;
  info.fill = 1.0;

  PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
  PetscUseTypeMethod(fact, choleskyfactor, NULL, &info);
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
{
  PetscFunctionBegin;
  fact->assembled                  = PETSC_TRUE;
  fact->preallocated               = PETSC_TRUE;
  fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscBLASInt  n, m, info, min, max;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->cmap->n, &n));
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  max = PetscMax(m, n);
  min = PetscMin(m, n);
  if (!mat->tau) { PetscCall(PetscMalloc1(min, &mat->tau)); }
  if (!mat->pivots) { PetscCall(PetscMalloc1(n, &mat->pivots)); }
  if (!mat->qrrhs) PetscCall(MatCreateVecs(A, NULL, &mat->qrrhs));
  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(PETSC_SUCCESS);
  if (!mat->fwork) {
    PetscScalar dummy;

    mat->lfwork = -1;
    PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
    PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
    PetscCall(PetscFPTrapPop());
    PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork));
    PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork));
  }
  PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
  PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
  PetscCall(PetscFPTrapPop());
  PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Bad argument to QR factorization %" PetscBLASInt_FMT, info);
  // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR.  For now just say rank is min of m and n
  mat->rank = min;

  A->ops->solve    = MatSolve_SeqDense_QR;
  A->ops->matsolve = MatMatSolve_SeqDense_QR;
  A->factortype    = MAT_FACTOR_QR;
  if (m == n) {
    A->ops->solvetranspose    = MatSolveTranspose_SeqDense_QR;
    A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
  }

  PetscCall(PetscFree(A->solvertype));
  PetscCall(PetscStrallocpy(MATSOLVERPETSC, &A->solvertype));

  PetscCall(PetscLogFlops(2.0 * min * min * (max - min / 3.0)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
{
  MatFactorInfo info;

  PetscFunctionBegin;
  info.fill = 1.0;

  PetscCall(MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES));
  PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
{
  PetscFunctionBegin;
  fact->assembled    = PETSC_TRUE;
  fact->preallocated = PETSC_TRUE;
  PetscCall(PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* uses LAPACK */
PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
{
  PetscFunctionBegin;
  PetscCall(MatCreate(PetscObjectComm((PetscObject)A), fact));
  PetscCall(MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
  PetscCall(MatSetType(*fact, MATDENSE));
  (*fact)->trivialsymbolic = PETSC_TRUE;
  if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
    (*fact)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqDense;
    (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
  } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
    (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
  } else if (ftype == MAT_FACTOR_QR) {
    PetscCall(PetscObjectComposeFunction((PetscObject)*fact, "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense));
  }
  (*fact)->factortype = ftype;

  PetscCall(PetscFree((*fact)->solvertype));
  PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype));
  PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]));
  PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]));
  PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]));
  PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  PetscScalar       *x, *v = mat->v, zero = 0.0, xt;
  const PetscScalar *b;
  PetscInt           m = A->rmap->n, i;
  PetscBLASInt       o = 1, bm = 0;

  PetscFunctionBegin;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  PetscCheck(A->offloadmask != PETSC_OFFLOAD_GPU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
#endif
  if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
  PetscCall(PetscBLASIntCast(m, &bm));
  if (flag & SOR_ZERO_INITIAL_GUESS) {
    /* this is a hack fix, should have another version without the second BLASdotu */
    PetscCall(VecSet(xx, zero));
  }
  PetscCall(VecGetArray(xx, &x));
  PetscCall(VecGetArrayRead(bb, &b));
  its = its * lits;
  PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
  while (its--) {
    if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
      for (i = 0; i < m; i++) {
        PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
        x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
      }
    }
    if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
      for (i = m - 1; i >= 0; i--) {
        PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
        x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
      }
    }
  }
  PetscCall(VecRestoreArrayRead(bb, &b));
  PetscCall(VecRestoreArray(xx, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMultColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  PetscScalar       *y, _DOne = 1.0, _DZero = 0.0;
  PetscBLASInt       m, n, _One             = 1;
  const PetscScalar *v = mat->v, *x;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  PetscCall(PetscBLASIntCast(c_end - c_start, &n));
  PetscCall(VecGetArrayRead(xx, &x));
  PetscCall(VecGetArrayWrite(yy, &y));
  if (!m || !n) {
    PetscBLASInt i;
    if (trans)
      for (i = 0; i < n; i++) y[i] = 0.0;
    else
      for (i = 0; i < m; i++) y[i] = 0.0;
  } else {
    if (trans) {
      if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
      else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DZero, y + c_start, &_One));
    } else {
      PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DZero, y, &_One));
    }
    PetscCall(PetscLogFlops(2.0 * m * n - n));
  }
  PetscCall(VecRestoreArrayRead(xx, &x));
  PetscCall(VecRestoreArrayWrite(yy, &y));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultHermitianTransposeColumnRange_SeqDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
{
  PetscFunctionBegin;
  PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultHermitianTranspose_SeqDense(Mat A, Vec xx, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultColumnRangeKernel_SeqDense(A, xx, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMultAddColumnRangeKernel_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end, PetscBool trans, PetscBool herm)
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  const PetscScalar *v   = mat->v, *x;
  PetscScalar       *y, _DOne = 1.0;
  PetscBLASInt       m, n, _One = 1;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(A->rmap->n, &m));
  PetscCall(PetscBLASIntCast(c_end - c_start, &n));
  PetscCall(VecCopy(zz, yy));
  if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(VecGetArray(yy, &y));
  PetscCall(VecGetArrayRead(xx, &x));
  if (trans) {
    if (herm) PetscCallBLAS("BLASgemv", BLASgemv_("C", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
    else PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x, &_One, &_DOne, y + c_start, &_One));
  } else {
    PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v + c_start * mat->lda, &mat->lda, x + c_start, &_One, &_DOne, y, &_One));
  }
  PetscCall(VecRestoreArrayRead(xx, &x));
  PetscCall(VecRestoreArray(yy, &y));
  PetscCall(PetscLogFlops(2.0 * m * n));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
{
  PetscFunctionBegin;
  PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_FALSE, PETSC_FALSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultHermitianTransposeAddColumnRange_SeqDense(Mat A, Vec xx, Vec zz, Vec yy, PetscInt c_start, PetscInt c_end)
{
  PetscFunctionBegin;
  PetscMPIInt rank;
  PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
  PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, c_start, c_end, PETSC_TRUE, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_FALSE, PETSC_FALSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_FALSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMultHermitianTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
{
  PetscFunctionBegin;
  PetscCall(MatMultAddColumnRangeKernel_SeqDense(A, xx, zz, yy, 0, A->cmap->n, PETSC_TRUE, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      i;

  PetscFunctionBegin;
  if (ncols) *ncols = A->cmap->n;
  if (cols) {
    PetscCall(PetscMalloc1(A->cmap->n, cols));
    for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
  }
  if (vals) {
    const PetscScalar *v;

    PetscCall(MatDenseGetArrayRead(A, &v));
    PetscCall(PetscMalloc1(A->cmap->n, vals));
    v += row;
    for (i = 0; i < A->cmap->n; i++) {
      (*vals)[i] = *v;
      v += mat->lda;
    }
    PetscCall(MatDenseRestoreArrayRead(A, &v));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
{
  PetscFunctionBegin;
  if (cols) PetscCall(PetscFree(*cols));
  if (vals) PetscCall(PetscFree(*vals));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscScalar  *av;
  PetscInt      i, j, idx = 0;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  PetscOffloadMask oldf;
#endif

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &av));
  if (!mat->roworiented) {
    if (addv == INSERT_VALUES) {
      for (j = 0; j < n; j++) {
        if (indexn[j] < 0) {
          idx += m;
          continue;
        }
        PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
        for (i = 0; i < m; i++) {
          if (indexm[i] < 0) {
            idx++;
            continue;
          }
          PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
          av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
        }
      }
    } else {
      for (j = 0; j < n; j++) {
        if (indexn[j] < 0) {
          idx += m;
          continue;
        }
        PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
        for (i = 0; i < m; i++) {
          if (indexm[i] < 0) {
            idx++;
            continue;
          }
          PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
          av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
        }
      }
    }
  } else {
    if (addv == INSERT_VALUES) {
      for (i = 0; i < m; i++) {
        if (indexm[i] < 0) {
          idx += n;
          continue;
        }
        PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
        for (j = 0; j < n; j++) {
          if (indexn[j] < 0) {
            idx++;
            continue;
          }
          PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
          av[indexn[j] * mat->lda + indexm[i]] = v ? v[idx++] : (idx++, 0.0);
        }
      }
    } else {
      for (i = 0; i < m; i++) {
        if (indexm[i] < 0) {
          idx += n;
          continue;
        }
        PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, indexm[i], A->rmap->n - 1);
        for (j = 0; j < n; j++) {
          if (indexn[j] < 0) {
            idx++;
            continue;
          }
          PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, indexn[j], A->cmap->n - 1);
          av[indexn[j] * mat->lda + indexm[i]] += v ? v[idx++] : (idx++, 0.0);
        }
      }
    }
  }
  /* hack to prevent unneeded copy to the GPU while returning the array */
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  oldf           = A->offloadmask;
  A->offloadmask = PETSC_OFFLOAD_GPU;
#endif
  PetscCall(MatDenseRestoreArray(A, &av));
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  const PetscScalar *vv;
  PetscInt           i, j;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArrayRead(A, &vv));
  /* row-oriented output */
  for (i = 0; i < m; i++) {
    if (indexm[i] < 0) {
      v += n;
      continue;
    }
    PetscCheck(indexm[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested larger than number rows %" PetscInt_FMT, indexm[i], A->rmap->n);
    for (j = 0; j < n; j++) {
      if (indexn[j] < 0) {
        v++;
        continue;
      }
      PetscCheck(indexn[j] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " requested larger than number columns %" PetscInt_FMT, indexn[j], A->cmap->n);
      *v++ = vv[indexn[j] * mat->lda + indexm[i]];
    }
  }
  PetscCall(MatDenseRestoreArrayRead(A, &vv));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
{
  PetscBool          skipHeader;
  PetscViewerFormat  format;
  PetscInt           header[4], M, N, m, lda, i, j;
  PetscCount         k;
  const PetscScalar *v;
  PetscScalar       *vwork;

  PetscFunctionBegin;
  PetscCall(PetscViewerSetUp(viewer));
  PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));
  PetscCall(PetscViewerGetFormat(viewer, &format));
  if (skipHeader) format = PETSC_VIEWER_NATIVE;

  PetscCall(MatGetSize(mat, &M, &N));

  /* write matrix header */
  header[0] = MAT_FILE_CLASSID;
  header[1] = M;
  header[2] = N;
  header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
  if (!skipHeader) PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));

  PetscCall(MatGetLocalSize(mat, &m, NULL));
  if (format != PETSC_VIEWER_NATIVE) {
    PetscInt nnz = m * N, *iwork;
    /* store row lengths for each row */
    PetscCall(PetscMalloc1(nnz, &iwork));
    for (i = 0; i < m; i++) iwork[i] = N;
    PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
    /* store column indices (zero start index) */
    for (k = 0, i = 0; i < m; i++)
      for (j = 0; j < N; j++, k++) iwork[k] = j;
    PetscCall(PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
    PetscCall(PetscFree(iwork));
  }
  /* store matrix values as a dense matrix in row major order */
  PetscCall(PetscMalloc1(m * N, &vwork));
  PetscCall(MatDenseGetArrayRead(mat, &v));
  PetscCall(MatDenseGetLDA(mat, &lda));
  for (k = 0, i = 0; i < m; i++)
    for (j = 0; j < N; j++, k++) vwork[k] = v[i + (size_t)lda * j];
  PetscCall(MatDenseRestoreArrayRead(mat, &v));
  PetscCall(PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
  PetscCall(PetscFree(vwork));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
{
  PetscBool    skipHeader;
  PetscInt     header[4], M, N, m, nz, lda, i, j, k;
  PetscInt     rows, cols;
  PetscScalar *v, *vwork;

  PetscFunctionBegin;
  PetscCall(PetscViewerSetUp(viewer));
  PetscCall(PetscViewerBinaryGetSkipHeader(viewer, &skipHeader));

  if (!skipHeader) {
    PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
    PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
    M = header[1];
    N = header[2];
    PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
    PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
    nz = header[3];
    PetscCheck(nz == MATRIX_BINARY_FORMAT_DENSE || nz >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Unknown matrix format %" PetscInt_FMT " in file", nz);
  } else {
    PetscCall(MatGetSize(mat, &M, &N));
    PetscCheck(M >= 0 && N >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Matrix binary file header was skipped, thus the user must specify the global sizes of input matrix");
    nz = MATRIX_BINARY_FORMAT_DENSE;
  }

  /* setup global sizes if not set */
  if (mat->rmap->N < 0) mat->rmap->N = M;
  if (mat->cmap->N < 0) mat->cmap->N = N;
  PetscCall(MatSetUp(mat));
  /* check if global sizes are correct */
  PetscCall(MatGetSize(mat, &rows, &cols));
  PetscCheck(M == rows && N == cols, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);

  PetscCall(MatGetSize(mat, NULL, &N));
  PetscCall(MatGetLocalSize(mat, &m, NULL));
  PetscCall(MatDenseGetArray(mat, &v));
  PetscCall(MatDenseGetLDA(mat, &lda));
  if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
    PetscCount nnz = (size_t)m * N;
    /* read in matrix values */
    PetscCall(PetscMalloc1(nnz, &vwork));
    PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
    /* store values in column major order */
    for (j = 0; j < N; j++)
      for (i = 0; i < m; i++) v[i + (size_t)lda * j] = vwork[(size_t)i * N + j];
    PetscCall(PetscFree(vwork));
  } else { /* matrix in file is sparse format */
    PetscInt nnz = 0, *rlens, *icols;
    /* read in row lengths */
    PetscCall(PetscMalloc1(m, &rlens));
    PetscCall(PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
    for (i = 0; i < m; i++) nnz += rlens[i];
    /* read in column indices and values */
    PetscCall(PetscMalloc2(nnz, &icols, nnz, &vwork));
    PetscCall(PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
    PetscCall(PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
    /* store values in column major order */
    for (k = 0, i = 0; i < m; i++)
      for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
    PetscCall(PetscFree(rlens));
    PetscCall(PetscFree2(icols, vwork));
  }
  PetscCall(MatDenseRestoreArray(mat, &v));
  PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
{
  PetscBool isbinary, ishdf5;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
  PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
  /* force binary viewer to load .info file if it has not yet done so */
  PetscCall(PetscViewerSetUp(viewer));
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
  if (isbinary) {
    PetscCall(MatLoad_Dense_Binary(newMat, viewer));
  } else if (ishdf5) {
#if defined(PETSC_HAVE_HDF5)
    PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
#else
    SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
#endif
  } else {
    SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
{
  Mat_SeqDense     *a = (Mat_SeqDense *)A->data;
  PetscInt          i, j;
  const char       *name;
  PetscScalar      *v, *av;
  PetscViewerFormat format;
#if defined(PETSC_USE_COMPLEX)
  PetscBool allreal = PETSC_TRUE;
#endif

  PetscFunctionBegin;
  PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&av));
  PetscCall(PetscViewerGetFormat(viewer, &format));
  if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
    PetscFunctionReturn(PETSC_SUCCESS); /* do nothing for now */
  } else if (format == PETSC_VIEWER_ASCII_COMMON) {
    PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
    for (i = 0; i < A->rmap->n; i++) {
      v = av + i;
      PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
      for (j = 0; j < A->cmap->n; j++) {
#if defined(PETSC_USE_COMPLEX)
        if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
          PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
        } else if (PetscRealPart(*v)) {
          PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v)));
        }
#else
        if (*v) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v));
#endif
        v += a->lda;
      }
      PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
    }
    PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
  } else {
    PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
#if defined(PETSC_USE_COMPLEX)
    /* determine if matrix has all real values */
    for (j = 0; j < A->cmap->n; j++) {
      v = av + j * a->lda;
      for (i = 0; i < A->rmap->n; i++) {
        if (PetscImaginaryPart(v[i])) {
          allreal = PETSC_FALSE;
          break;
        }
      }
    }
#endif
    if (format == PETSC_VIEWER_ASCII_MATLAB) {
      PetscCall(PetscObjectGetName((PetscObject)A, &name));
      PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n));
      PetscCall(PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n));
      PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
    }

    for (i = 0; i < A->rmap->n; i++) {
      v = av + i;
      for (j = 0; j < A->cmap->n; j++) {
#if defined(PETSC_USE_COMPLEX)
        if (allreal) {
          PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v)));
        } else {
          PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v)));
        }
#else
        PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v));
#endif
        v += a->lda;
      }
      PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
    }
    if (format == PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
    PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
  }
  PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&av));
  PetscCall(PetscViewerFlush(viewer));
  PetscFunctionReturn(PETSC_SUCCESS);
}

#include <petscdraw.h>
static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
{
  Mat                A = (Mat)Aa;
  PetscInt           m = A->rmap->n, n = A->cmap->n, i, j;
  int                color = PETSC_DRAW_WHITE;
  const PetscScalar *v;
  PetscViewer        viewer;
  PetscReal          xl, yl, xr, yr, x_l, x_r, y_l, y_r;
  PetscViewerFormat  format;

  PetscFunctionBegin;
  PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
  PetscCall(PetscViewerGetFormat(viewer, &format));
  PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));

  /* Loop over matrix elements drawing boxes */
  PetscCall(MatDenseGetArrayRead(A, &v));
  if (format != PETSC_VIEWER_DRAW_CONTOUR) {
    PetscDrawCollectiveBegin(draw);
    /* Blue for negative and Red for positive */
    for (j = 0; j < n; j++) {
      x_l = j;
      x_r = x_l + 1.0;
      for (i = 0; i < m; i++) {
        y_l = m - i - 1.0;
        y_r = y_l + 1.0;
        if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
        else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
        else continue;
        PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
      }
    }
    PetscDrawCollectiveEnd(draw);
  } else {
    /* use contour shading to indicate magnitude of values */
    /* first determine max of all nonzero values */
    PetscReal minv = 0.0, maxv = 0.0;
    PetscDraw popup;

    for (i = 0; i < m * n; i++) {
      if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
    }
    if (minv >= maxv) maxv = minv + PETSC_SMALL;
    PetscCall(PetscDrawGetPopup(draw, &popup));
    PetscCall(PetscDrawScalePopup(popup, minv, maxv));

    PetscDrawCollectiveBegin(draw);
    for (j = 0; j < n; j++) {
      x_l = j;
      x_r = x_l + 1.0;
      for (i = 0; i < m; i++) {
        y_l   = m - i - 1.0;
        y_r   = y_l + 1.0;
        color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
        PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
      }
    }
    PetscDrawCollectiveEnd(draw);
  }
  PetscCall(MatDenseRestoreArrayRead(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
{
  PetscDraw draw;
  PetscBool isnull;
  PetscReal xr, yr, xl, yl, h, w;

  PetscFunctionBegin;
  PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
  PetscCall(PetscDrawIsNull(draw, &isnull));
  if (isnull) PetscFunctionReturn(PETSC_SUCCESS);

  xr = A->cmap->n;
  yr = A->rmap->n;
  h  = yr / 10.0;
  w  = xr / 10.0;
  xr += w;
  yr += h;
  xl = -w;
  yl = -h;
  PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
  PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
  PetscCall(PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A));
  PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
  PetscCall(PetscDrawSave(draw));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
{
  PetscBool iascii, isbinary, isdraw;

  PetscFunctionBegin;
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
  if (iascii) PetscCall(MatView_SeqDense_ASCII(A, viewer));
  else if (isbinary) PetscCall(MatView_Dense_Binary(A, viewer));
  else if (isdraw) PetscCall(MatView_SeqDense_Draw(A, viewer));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  PetscCheck(!a->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseResetArray() first");
  a->unplacedarray       = a->v;
  a->unplaced_user_alloc = a->user_alloc;
  a->v                   = (PetscScalar *)array;
  a->user_alloc          = PETSC_TRUE;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  a->v             = a->unplacedarray;
  a->user_alloc    = a->unplaced_user_alloc;
  a->unplacedarray = NULL;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  if (!a->user_alloc) PetscCall(PetscFree(a->v));
  a->v          = (PetscScalar *)array;
  a->user_alloc = PETSC_FALSE;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDestroy_SeqDense(Mat mat)
{
  Mat_SeqDense *l = (Mat_SeqDense *)mat->data;

  PetscFunctionBegin;
  PetscCall(PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n));
  PetscCall(VecDestroy(&l->qrrhs));
  PetscCall(PetscFree(l->tau));
  PetscCall(PetscFree(l->pivots));
  PetscCall(PetscFree(l->fwork));
  if (!l->user_alloc) PetscCall(PetscFree(l->v));
  if (!l->unplaced_user_alloc) PetscCall(PetscFree(l->unplacedarray));
  PetscCheck(!l->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!l->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  PetscCall(VecDestroy(&l->cvec));
  PetscCall(MatDestroy(&l->cmat));
  PetscCall(PetscFree(mat->data));

  PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL));
#if defined(PETSC_HAVE_ELEMENTAL)
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL));
#endif
#if defined(PETSC_HAVE_SCALAPACK)
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL));
#endif
#if defined(PETSC_HAVE_CUDA)
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL));
#endif
#if defined(PETSC_HAVE_HIP)
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL));
#endif
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL));

  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
  PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
  PetscScalar  *v, tmp;

  PetscFunctionBegin;
  if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
  if (reuse == MAT_INPLACE_MATRIX) {
    if (m == n) { /* in place transpose */
      PetscCall(MatDenseGetArray(A, &v));
      for (j = 0; j < m; j++) {
        for (k = 0; k < j; k++) {
          tmp          = v[j + k * M];
          v[j + k * M] = v[k + j * M];
          v[k + j * M] = tmp;
        }
      }
      PetscCall(MatDenseRestoreArray(A, &v));
    } else { /* reuse memory, temporary allocates new memory */
      PetscScalar *v2;
      PetscLayout  tmplayout;

      PetscCall(PetscMalloc1((size_t)m * n, &v2));
      PetscCall(MatDenseGetArray(A, &v));
      for (j = 0; j < n; j++) {
        for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
      }
      PetscCall(PetscArraycpy(v, v2, (size_t)m * n));
      PetscCall(PetscFree(v2));
      PetscCall(MatDenseRestoreArray(A, &v));
      /* cleanup size dependent quantities */
      PetscCall(VecDestroy(&mat->cvec));
      PetscCall(MatDestroy(&mat->cmat));
      PetscCall(PetscFree(mat->pivots));
      PetscCall(PetscFree(mat->fwork));
      /* swap row/col layouts */
      PetscCall(PetscBLASIntCast(n, &mat->lda));
      tmplayout = A->rmap;
      A->rmap   = A->cmap;
      A->cmap   = tmplayout;
    }
  } else { /* out-of-place transpose */
    Mat           tmat;
    Mat_SeqDense *tmatd;
    PetscScalar  *v2;
    PetscInt      M2;

    if (reuse == MAT_INITIAL_MATRIX) {
      PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &tmat));
      PetscCall(MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n));
      PetscCall(MatSetType(tmat, ((PetscObject)A)->type_name));
      PetscCall(MatSeqDenseSetPreallocation(tmat, NULL));
    } else tmat = *matout;

    PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&v));
    PetscCall(MatDenseGetArray(tmat, &v2));
    tmatd = (Mat_SeqDense *)tmat->data;
    M2    = tmatd->lda;
    for (j = 0; j < n; j++) {
      for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
    }
    PetscCall(MatDenseRestoreArray(tmat, &v2));
    PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&v));
    PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
    *matout = tmat;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
{
  Mat_SeqDense      *mat1 = (Mat_SeqDense *)A1->data;
  Mat_SeqDense      *mat2 = (Mat_SeqDense *)A2->data;
  PetscInt           i;
  const PetscScalar *v1, *v2;

  PetscFunctionBegin;
  if (A1->rmap->n != A2->rmap->n) {
    *flg = PETSC_FALSE;
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  if (A1->cmap->n != A2->cmap->n) {
    *flg = PETSC_FALSE;
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  PetscCall(MatDenseGetArrayRead(A1, &v1));
  PetscCall(MatDenseGetArrayRead(A2, &v2));
  for (i = 0; i < A1->cmap->n; i++) {
    PetscCall(PetscArraycmp(v1, v2, A1->rmap->n, flg));
    if (*flg == PETSC_FALSE) PetscFunctionReturn(PETSC_SUCCESS);
    v1 += mat1->lda;
    v2 += mat2->lda;
  }
  PetscCall(MatDenseRestoreArrayRead(A1, &v1));
  PetscCall(MatDenseRestoreArrayRead(A2, &v2));
  *flg = PETSC_TRUE;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  PetscInt           i, n, len;
  PetscScalar       *x;
  const PetscScalar *vv;

  PetscFunctionBegin;
  PetscCall(VecGetSize(v, &n));
  PetscCall(VecGetArray(v, &x));
  len = PetscMin(A->rmap->n, A->cmap->n);
  PetscCall(MatDenseGetArrayRead(A, &vv));
  PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
  for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
  PetscCall(MatDenseRestoreArrayRead(A, &vv));
  PetscCall(VecRestoreArray(v, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
{
  Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
  const PetscScalar *l, *r;
  PetscScalar        x, *v, *vv;
  PetscInt           i, j, m = A->rmap->n, n = A->cmap->n;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &vv));
  if (ll) {
    PetscCall(VecGetSize(ll, &m));
    PetscCall(VecGetArrayRead(ll, &l));
    PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vec wrong size");
    for (i = 0; i < m; i++) {
      x = l[i];
      v = vv + i;
      for (j = 0; j < n; j++) {
        (*v) *= x;
        v += mat->lda;
      }
    }
    PetscCall(VecRestoreArrayRead(ll, &l));
    PetscCall(PetscLogFlops(1.0 * n * m));
  }
  if (rr) {
    PetscCall(VecGetSize(rr, &n));
    PetscCall(VecGetArrayRead(rr, &r));
    PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec wrong size");
    for (i = 0; i < n; i++) {
      x = r[i];
      v = vv + i * mat->lda;
      for (j = 0; j < m; j++) (*v++) *= x;
    }
    PetscCall(VecRestoreArrayRead(rr, &r));
    PetscCall(PetscLogFlops(1.0 * n * m));
  }
  PetscCall(MatDenseRestoreArray(A, &vv));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscScalar  *v, *vv;
  PetscReal     sum = 0.0;
  PetscInt      lda, m = A->rmap->n, i, j;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArrayRead(A, (const PetscScalar **)&vv));
  PetscCall(MatDenseGetLDA(A, &lda));
  v = vv;
  if (type == NORM_FROBENIUS) {
    if (lda > m) {
      for (j = 0; j < A->cmap->n; j++) {
        v = vv + j * lda;
        for (i = 0; i < m; i++) {
          sum += PetscRealPart(PetscConj(*v) * (*v));
          v++;
        }
      }
    } else {
#if defined(PETSC_USE_REAL___FP16)
      PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
      PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
    }
#else
      for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
        sum += PetscRealPart(PetscConj(*v) * (*v));
        v++;
      }
    }
    *nrm = PetscSqrtReal(sum);
#endif
    PetscCall(PetscLogFlops(2.0 * A->cmap->n * A->rmap->n));
  } else if (type == NORM_1) {
    *nrm = 0.0;
    for (j = 0; j < A->cmap->n; j++) {
      v   = vv + j * mat->lda;
      sum = 0.0;
      for (i = 0; i < A->rmap->n; i++) {
        sum += PetscAbsScalar(*v);
        v++;
      }
      if (sum > *nrm) *nrm = sum;
    }
    PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
  } else if (type == NORM_INFINITY) {
    *nrm = 0.0;
    for (j = 0; j < A->rmap->n; j++) {
      v   = vv + j;
      sum = 0.0;
      for (i = 0; i < A->cmap->n; i++) {
        sum += PetscAbsScalar(*v);
        v += mat->lda;
      }
      if (sum > *nrm) *nrm = sum;
    }
    PetscCall(PetscLogFlops(1.0 * A->cmap->n * A->rmap->n));
  } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
  PetscCall(MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
{
  Mat_SeqDense *aij = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  switch (op) {
  case MAT_ROW_ORIENTED:
    aij->roworiented = flg;
    break;
  case MAT_NEW_NONZERO_LOCATIONS:
  case MAT_NEW_NONZERO_LOCATION_ERR:
  case MAT_NEW_NONZERO_ALLOCATION_ERR:
  case MAT_FORCE_DIAGONAL_ENTRIES:
  case MAT_KEEP_NONZERO_PATTERN:
  case MAT_IGNORE_OFF_PROC_ENTRIES:
  case MAT_USE_HASH_TABLE:
  case MAT_IGNORE_ZERO_ENTRIES:
  case MAT_IGNORE_LOWER_TRIANGULAR:
  case MAT_SORTED_FULL:
    PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
    break;
  case MAT_SPD:
  case MAT_SYMMETRIC:
  case MAT_STRUCTURALLY_SYMMETRIC:
  case MAT_HERMITIAN:
  case MAT_SYMMETRY_ETERNAL:
  case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
  case MAT_SPD_ETERNAL:
    break;
  default:
    SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatZeroEntries_SeqDense(Mat A)
{
  Mat_SeqDense *l   = (Mat_SeqDense *)A->data;
  PetscInt      lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
  PetscScalar  *v;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArrayWrite(A, &v));
  if (lda > m) {
    for (j = 0; j < n; j++) PetscCall(PetscArrayzero(v + j * lda, m));
  } else {
    PetscCall(PetscArrayzero(v, PetscInt64Mult(m, n)));
  }
  PetscCall(MatDenseRestoreArrayWrite(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
{
  Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
  PetscInt           m = l->lda, n = A->cmap->n, i, j;
  PetscScalar       *slot, *bb, *v;
  const PetscScalar *xx;

  PetscFunctionBegin;
  if (PetscDefined(USE_DEBUG)) {
    for (i = 0; i < N; i++) {
      PetscCheck(rows[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row requested to be zeroed");
      PetscCheck(rows[i] < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " requested to be zeroed greater than or equal number of rows %" PetscInt_FMT, rows[i], A->rmap->n);
    }
  }
  if (!N) PetscFunctionReturn(PETSC_SUCCESS);

  /* fix right-hand side if needed */
  if (x && b) {
    PetscCall(VecGetArrayRead(x, &xx));
    PetscCall(VecGetArray(b, &bb));
    for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
    PetscCall(VecRestoreArrayRead(x, &xx));
    PetscCall(VecRestoreArray(b, &bb));
  }

  PetscCall(MatDenseGetArray(A, &v));
  for (i = 0; i < N; i++) {
    slot = v + rows[i];
    for (j = 0; j < n; j++) {
      *slot = 0.0;
      slot += m;
    }
  }
  if (diag != 0.0) {
    PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only coded for square matrices");
    for (i = 0; i < N; i++) {
      slot  = v + (m + 1) * rows[i];
      *slot = diag;
    }
  }
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  *lda = mat->lda;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!mat->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  *array = mat->v;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
{
  PetscFunctionBegin;
  if (array) *array = NULL;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`

  Not Collective

  Input Parameter:
. A - a `MATDENSE` or `MATDENSECUDA` matrix

  Output Parameter:
. lda - the leading dimension

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
@*/
PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(lda, 2);
  MatCheckPreallocated(A, 1);
  PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix

  Collective if the matrix layouts have not yet been setup

  Input Parameters:
+ A   - a `MATDENSE` or `MATDENSECUDA` matrix
- lda - the leading dimension

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
@*/
PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored

  Logically Collective

  Input Parameter:
. A - a dense matrix

  Output Parameter:
. array - pointer to the data

  Level: intermediate

  Fortran Notes:
  `MatDenseGetArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseGetArrayF90()`

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseGetArray(Mat A, PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`

  Logically Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data (may be `NULL`)

  Level: intermediate

  Fortran Notes:
  `MatDenseRestoreArray()` Fortran binding is deprecated (since PETSc 3.19), use `MatDenseRestoreArrayF90()`

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  if (array) PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
  PetscCall(PetscObjectStateIncrease((PetscObject)A));
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored

  Not Collective

  Input Parameter:
. A - a dense matrix

  Output Parameter:
. array - pointer to the data

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`

  Not Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  if (array) PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored

  Not Collective

  Input Parameter:
. A - a dense matrix

  Output Parameter:
. array - pointer to the data

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
@*/
PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`

  Not Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
@*/
PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar *array[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  if (array) PetscAssertPointer(array, 2);
  PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
  PetscCall(PetscObjectStateIncrease((PetscObject)A));
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArrayAndMemType - gives read-write access to the array where the data for a `MATDENSE` matrix is stored

  Logically Collective

  Input Parameter:
. A - a dense matrix

  Output Parameters:
+ array - pointer to the data
- mtype - memory type of the returned pointer

  Level: intermediate

  Note:
  If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
  an array on device is always returned and is guaranteed to contain the matrix's latest data.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArrayRead()`,
   `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
@*/
PetscErrorCode MatDenseGetArrayAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) {
    /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
    PetscCall(MatDenseGetArrayAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
  } else {
    PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array, mtype));
    } else {
      PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
      if (mtype) *mtype = PETSC_MEMTYPE_HOST;
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArrayAndMemType - returns access to the array that is obtained by `MatDenseGetArrayAndMemType()`

  Logically Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseRestoreArrayAndMemType(Mat A, PetscScalar *array[])
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) {
    PetscCall(MatDenseRestoreArrayAndMemType(((Mat_MPIDense *)A->data)->A, array));
  } else {
    PetscErrorCode (*fptr)(Mat, PetscScalar **);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array));
    } else {
      PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
    }
    *array = NULL;
  }
  PetscCall(PetscObjectStateIncrease((PetscObject)A));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArrayReadAndMemType - gives read-only access to the array where the data for a `MATDENSE` matrix is stored

  Logically Collective

  Input Parameter:
. A - a dense matrix

  Output Parameters:
+ array - pointer to the data
- mtype - memory type of the returned pointer

  Level: intermediate

  Note:
  If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
  an array on device is always returned and is guaranteed to contain the matrix's latest data.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayReadAndMemType()`, `MatDenseGetArrayWriteAndMemType()`,
   `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
@*/
PetscErrorCode MatDenseGetArrayReadAndMemType(Mat A, const PetscScalar *array[], PetscMemType *mtype)
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) { /* Dispatch here so that the code can be reused for all subclasses of MATDENSE */
    PetscCall(MatDenseGetArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
  } else {
    PetscErrorCode (*fptr)(Mat, const PetscScalar **, PetscMemType *);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayReadAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array, mtype));
    } else {
      PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
      if (mtype) *mtype = PETSC_MEMTYPE_HOST;
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArrayReadAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`

  Logically Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseRestoreArrayReadAndMemType(Mat A, const PetscScalar *array[])
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) {
    PetscCall(MatDenseRestoreArrayReadAndMemType(((Mat_MPIDense *)A->data)->A, array));
  } else {
    PetscErrorCode (*fptr)(Mat, const PetscScalar **);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayReadAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array));
    } else {
      PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, PetscScalar **), (A, (PetscScalar **)array));
    }
    *array = NULL;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetArrayWriteAndMemType - gives write-only access to the array where the data for a `MATDENSE` matrix is stored

  Logically Collective

  Input Parameter:
. A - a dense matrix

  Output Parameters:
+ array - pointer to the data
- mtype - memory type of the returned pointer

  Level: intermediate

  Note:
  If the matrix is of a device type such as `MATDENSECUDA`, `MATDENSEHIP`, etc.,
  an array on device is always returned and is guaranteed to contain the matrix's latest data.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreArrayWriteAndMemType()`, `MatDenseGetArrayReadAndMemType()`, `MatDenseGetArrayRead()`,
  `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`, `MatSeqAIJGetCSRAndMemType()`
@*/
PetscErrorCode MatDenseGetArrayWriteAndMemType(Mat A, PetscScalar *array[], PetscMemType *mtype)
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(MatBindToCPU(A, PETSC_FALSE)); /* We want device matrices to always return device arrays, so we unbind the matrix if it is bound to CPU */
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) {
    PetscCall(MatDenseGetArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array, mtype));
  } else {
    PetscErrorCode (*fptr)(Mat, PetscScalar **, PetscMemType *);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseGetArrayWriteAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array, mtype));
    } else {
      PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
      if (mtype) *mtype = PETSC_MEMTYPE_HOST;
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreArrayWriteAndMemType - returns access to the array that is obtained by `MatDenseGetArrayReadAndMemType()`

  Logically Collective

  Input Parameters:
+ A     - a dense matrix
- array - pointer to the data

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArrayWriteAndMemType()`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
@*/
PetscErrorCode MatDenseRestoreArrayWriteAndMemType(Mat A, PetscScalar *array[])
{
  PetscBool isMPI;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(array, 2);
  PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &isMPI));
  if (isMPI) {
    PetscCall(MatDenseRestoreArrayWriteAndMemType(((Mat_MPIDense *)A->data)->A, array));
  } else {
    PetscErrorCode (*fptr)(Mat, PetscScalar **);

    PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatDenseRestoreArrayWriteAndMemType_C", &fptr));
    if (fptr) {
      PetscCall((*fptr)(A, array));
    } else {
      PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
    }
    *array = NULL;
  }
  PetscCall(PetscObjectStateIncrease((PetscObject)A));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
{
  Mat_SeqDense   *mat = (Mat_SeqDense *)A->data;
  PetscInt        i, j, nrows, ncols, ldb;
  const PetscInt *irow, *icol;
  PetscScalar    *av, *bv, *v = mat->v;
  Mat             newmat;

  PetscFunctionBegin;
  PetscCall(ISGetIndices(isrow, &irow));
  PetscCall(ISGetIndices(iscol, &icol));
  PetscCall(ISGetLocalSize(isrow, &nrows));
  PetscCall(ISGetLocalSize(iscol, &ncols));

  /* Check submatrixcall */
  if (scall == MAT_REUSE_MATRIX) {
    PetscInt n_cols, n_rows;
    PetscCall(MatGetSize(*B, &n_rows, &n_cols));
    if (n_rows != nrows || n_cols != ncols) {
      /* resize the result matrix to match number of requested rows/columns */
      PetscCall(MatSetSizes(*B, nrows, ncols, nrows, ncols));
    }
    newmat = *B;
  } else {
    /* Create and fill new matrix */
    PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
    PetscCall(MatSetSizes(newmat, nrows, ncols, nrows, ncols));
    PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
    PetscCall(MatSeqDenseSetPreallocation(newmat, NULL));
  }

  /* Now extract the data pointers and do the copy,column at a time */
  PetscCall(MatDenseGetArray(newmat, &bv));
  PetscCall(MatDenseGetLDA(newmat, &ldb));
  for (i = 0; i < ncols; i++) {
    av = v + mat->lda * icol[i];
    for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
    bv += ldb;
  }
  PetscCall(MatDenseRestoreArray(newmat, &bv));

  /* Assemble the matrices so that the correct flags are set */
  PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));

  /* Free work space */
  PetscCall(ISRestoreIndices(isrow, &irow));
  PetscCall(ISRestoreIndices(iscol, &icol));
  *B = newmat;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
{
  PetscInt i;

  PetscFunctionBegin;
  if (scall == MAT_INITIAL_MATRIX) PetscCall(PetscCalloc1(n, B));

  for (i = 0; i < n; i++) PetscCall(MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode)
{
  PetscFunctionBegin;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode)
{
  PetscFunctionBegin;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
  const PetscScalar *va;
  PetscScalar       *vb;
  PetscInt           lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;

  PetscFunctionBegin;
  /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
  if (A->ops->copy != B->ops->copy) {
    PetscCall(MatCopy_Basic(A, B, str));
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  PetscCheck(m == B->rmap->n && n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "size(B) != size(A)");
  PetscCall(MatDenseGetArrayRead(A, &va));
  PetscCall(MatDenseGetArray(B, &vb));
  if (lda1 > m || lda2 > m) {
    for (j = 0; j < n; j++) PetscCall(PetscArraycpy(vb + j * lda2, va + j * lda1, m));
  } else {
    PetscCall(PetscArraycpy(vb, va, A->rmap->n * A->cmap->n));
  }
  PetscCall(MatDenseRestoreArray(B, &vb));
  PetscCall(MatDenseRestoreArrayRead(A, &va));
  PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatSetUp_SeqDense(Mat A)
{
  PetscFunctionBegin;
  PetscCall(PetscLayoutSetUp(A->rmap));
  PetscCall(PetscLayoutSetUp(A->cmap));
  if (!A->preallocated) PetscCall(MatSeqDenseSetPreallocation(A, NULL));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatConjugate_SeqDense(Mat A)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      i, j;
  PetscInt      min = PetscMin(A->rmap->n, A->cmap->n);
  PetscScalar  *aa;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &aa));
  for (j = 0; j < A->cmap->n; j++) {
    for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
  }
  PetscCall(MatDenseRestoreArray(A, &aa));
  if (mat->tau)
    for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatRealPart_SeqDense(Mat A)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      i, j;
  PetscScalar  *aa;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &aa));
  for (j = 0; j < A->cmap->n; j++) {
    for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
  }
  PetscCall(MatDenseRestoreArray(A, &aa));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
{
  Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
  PetscInt      i, j;
  PetscScalar  *aa;

  PetscFunctionBegin;
  PetscCall(MatDenseGetArray(A, &aa));
  for (j = 0; j < A->cmap->n; j++) {
    for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
  }
  PetscCall(MatDenseRestoreArray(A, &aa));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
{
  PetscInt  m = A->rmap->n, n = B->cmap->n;
  PetscBool cisdense = PETSC_FALSE;

  PetscFunctionBegin;
  PetscCall(MatSetSizes(C, m, n, m, n));
#if defined(PETSC_HAVE_CUDA)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
#endif
#if defined(PETSC_HAVE_HIP)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
#endif
  if (!cisdense) {
    PetscBool flg;

    PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
    PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
  }
  PetscCall(MatSetUp(C));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
  PetscBLASInt       m, n, k;
  const PetscScalar *av, *bv;
  PetscScalar       *cv;
  PetscScalar        _DOne = 1.0, _DZero = 0.0;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(C->rmap->n, &m));
  PetscCall(PetscBLASIntCast(C->cmap->n, &n));
  PetscCall(PetscBLASIntCast(A->cmap->n, &k));
  if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(MatDenseGetArrayRead(A, &av));
  PetscCall(MatDenseGetArrayRead(B, &bv));
  PetscCall(MatDenseGetArrayWrite(C, &cv));
  PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
  PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
  PetscCall(MatDenseRestoreArrayRead(A, &av));
  PetscCall(MatDenseRestoreArrayRead(B, &bv));
  PetscCall(MatDenseRestoreArrayWrite(C, &cv));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
{
  PetscInt  m = A->rmap->n, n = B->rmap->n;
  PetscBool cisdense = PETSC_FALSE;

  PetscFunctionBegin;
  PetscCall(MatSetSizes(C, m, n, m, n));
#if defined(PETSC_HAVE_CUDA)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
#endif
#if defined(PETSC_HAVE_HIP)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
#endif
  if (!cisdense) {
    PetscBool flg;

    PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
    PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
  }
  PetscCall(MatSetUp(C));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
  Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
  const PetscScalar *av, *bv;
  PetscScalar       *cv;
  PetscBLASInt       m, n, k;
  PetscScalar        _DOne = 1.0, _DZero = 0.0;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(C->rmap->n, &m));
  PetscCall(PetscBLASIntCast(C->cmap->n, &n));
  PetscCall(PetscBLASIntCast(A->cmap->n, &k));
  if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(MatDenseGetArrayRead(A, &av));
  PetscCall(MatDenseGetArrayRead(B, &bv));
  PetscCall(MatDenseGetArrayWrite(C, &cv));
  PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
  PetscCall(MatDenseRestoreArrayRead(A, &av));
  PetscCall(MatDenseRestoreArrayRead(B, &bv));
  PetscCall(MatDenseRestoreArrayWrite(C, &cv));
  PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
{
  PetscInt  m = A->cmap->n, n = B->cmap->n;
  PetscBool cisdense = PETSC_FALSE;

  PetscFunctionBegin;
  PetscCall(MatSetSizes(C, m, n, m, n));
#if defined(PETSC_HAVE_CUDA)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, ""));
#endif
#if defined(PETSC_HAVE_HIP)
  PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, ""));
#endif
  if (!cisdense) {
    PetscBool flg;

    PetscCall(PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg));
    PetscCall(MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE));
  }
  PetscCall(MatSetUp(C));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
  Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
  const PetscScalar *av, *bv;
  PetscScalar       *cv;
  PetscBLASInt       m, n, k;
  PetscScalar        _DOne = 1.0, _DZero = 0.0;

  PetscFunctionBegin;
  PetscCall(PetscBLASIntCast(C->rmap->n, &m));
  PetscCall(PetscBLASIntCast(C->cmap->n, &n));
  PetscCall(PetscBLASIntCast(A->rmap->n, &k));
  if (!m || !n || !k) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCall(MatDenseGetArrayRead(A, &av));
  PetscCall(MatDenseGetArrayRead(B, &bv));
  PetscCall(MatDenseGetArrayWrite(C, &cv));
  PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
  PetscCall(MatDenseRestoreArrayRead(A, &av));
  PetscCall(MatDenseRestoreArrayRead(B, &bv));
  PetscCall(MatDenseRestoreArrayWrite(C, &cv));
  PetscCall(PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1)));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
{
  PetscFunctionBegin;
  C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
  C->ops->productsymbolic = MatProductSymbolic_AB;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
{
  PetscFunctionBegin;
  C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
  C->ops->productsymbolic          = MatProductSymbolic_AtB;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
{
  PetscFunctionBegin;
  C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
  C->ops->productsymbolic          = MatProductSymbolic_ABt;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
{
  Mat_Product *product = C->product;

  PetscFunctionBegin;
  switch (product->type) {
  case MATPRODUCT_AB:
    PetscCall(MatProductSetFromOptions_SeqDense_AB(C));
    break;
  case MATPRODUCT_AtB:
    PetscCall(MatProductSetFromOptions_SeqDense_AtB(C));
    break;
  case MATPRODUCT_ABt:
    PetscCall(MatProductSetFromOptions_SeqDense_ABt(C));
    break;
  default:
    break;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
  PetscScalar       *x;
  const PetscScalar *aa;

  PetscFunctionBegin;
  PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
  PetscCall(VecGetArray(v, &x));
  PetscCall(VecGetLocalSize(v, &p));
  PetscCall(MatDenseGetArrayRead(A, &aa));
  PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
  for (i = 0; i < m; i++) {
    x[i] = aa[i];
    if (idx) idx[i] = 0;
    for (j = 1; j < n; j++) {
      if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
        x[i] = aa[i + a->lda * j];
        if (idx) idx[i] = j;
      }
    }
  }
  PetscCall(MatDenseRestoreArrayRead(A, &aa));
  PetscCall(VecRestoreArray(v, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
  PetscScalar       *x;
  PetscReal          atmp;
  const PetscScalar *aa;

  PetscFunctionBegin;
  PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
  PetscCall(VecGetArray(v, &x));
  PetscCall(VecGetLocalSize(v, &p));
  PetscCall(MatDenseGetArrayRead(A, &aa));
  PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
  for (i = 0; i < m; i++) {
    x[i] = PetscAbsScalar(aa[i]);
    for (j = 1; j < n; j++) {
      atmp = PetscAbsScalar(aa[i + a->lda * j]);
      if (PetscAbsScalar(x[i]) < atmp) {
        x[i] = atmp;
        if (idx) idx[i] = j;
      }
    }
  }
  PetscCall(MatDenseRestoreArrayRead(A, &aa));
  PetscCall(VecRestoreArray(v, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
  PetscScalar       *x;
  const PetscScalar *aa;

  PetscFunctionBegin;
  PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
  PetscCall(MatDenseGetArrayRead(A, &aa));
  PetscCall(VecGetArray(v, &x));
  PetscCall(VecGetLocalSize(v, &p));
  PetscCheck(p == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
  for (i = 0; i < m; i++) {
    x[i] = aa[i];
    if (idx) idx[i] = 0;
    for (j = 1; j < n; j++) {
      if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
        x[i] = aa[i + a->lda * j];
        if (idx) idx[i] = j;
      }
    }
  }
  PetscCall(VecRestoreArray(v, &x));
  PetscCall(MatDenseRestoreArrayRead(A, &aa));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
{
  Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
  PetscScalar       *x;
  const PetscScalar *aa;

  PetscFunctionBegin;
  PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
  PetscCall(MatDenseGetArrayRead(A, &aa));
  PetscCall(VecGetArray(v, &x));
  PetscCall(PetscArraycpy(x, aa + col * a->lda, A->rmap->n));
  PetscCall(VecRestoreArray(v, &x));
  PetscCall(MatDenseRestoreArrayRead(A, &aa));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
{
  PetscInt           i, j, m, n;
  const PetscScalar *a;

  PetscFunctionBegin;
  PetscCall(MatGetSize(A, &m, &n));
  PetscCall(PetscArrayzero(reductions, n));
  PetscCall(MatDenseGetArrayRead(A, &a));
  if (type == NORM_2) {
    for (i = 0; i < n; i++) {
      for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
      a = PetscSafePointerPlusOffset(a, m);
    }
  } else if (type == NORM_1) {
    for (i = 0; i < n; i++) {
      for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
      a = PetscSafePointerPlusOffset(a, m);
    }
  } else if (type == NORM_INFINITY) {
    for (i = 0; i < n; i++) {
      for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
      a = PetscSafePointerPlusOffset(a, m);
    }
  } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
    for (i = 0; i < n; i++) {
      for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
      a = PetscSafePointerPlusOffset(a, m);
    }
  } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
    for (i = 0; i < n; i++) {
      for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
      a = PetscSafePointerPlusOffset(a, m);
    }
  } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
  PetscCall(MatDenseRestoreArrayRead(A, &a));
  if (type == NORM_2) {
    for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
  } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
    for (i = 0; i < n; i++) reductions[i] /= m;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
{
  PetscScalar *a;
  PetscInt     lda, m, n, i, j;

  PetscFunctionBegin;
  PetscCall(MatGetSize(x, &m, &n));
  PetscCall(MatDenseGetLDA(x, &lda));
  PetscCall(MatDenseGetArrayWrite(x, &a));
  for (j = 0; j < n; j++) {
    for (i = 0; i < m; i++) PetscCall(PetscRandomGetValue(rctx, a + j * lda + i));
  }
  PetscCall(MatDenseRestoreArrayWrite(x, &a));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d)
{
  PetscFunctionBegin;
  *missing = PETSC_FALSE;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* vals is not const */
static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;
  PetscScalar  *v;

  PetscFunctionBegin;
  PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
  PetscCall(MatDenseGetArray(A, &v));
  *vals = v + col * a->lda;
  PetscCall(MatDenseRestoreArray(A, &v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
{
  PetscFunctionBegin;
  if (vals) *vals = NULL; /* user cannot accidentally use the array later */
  PetscFunctionReturn(PETSC_SUCCESS);
}

static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
                                       MatGetRow_SeqDense,
                                       MatRestoreRow_SeqDense,
                                       MatMult_SeqDense,
                                       /*  4*/ MatMultAdd_SeqDense,
                                       MatMultTranspose_SeqDense,
                                       MatMultTransposeAdd_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 10*/ NULL,
                                       MatLUFactor_SeqDense,
                                       MatCholeskyFactor_SeqDense,
                                       MatSOR_SeqDense,
                                       MatTranspose_SeqDense,
                                       /* 15*/ MatGetInfo_SeqDense,
                                       MatEqual_SeqDense,
                                       MatGetDiagonal_SeqDense,
                                       MatDiagonalScale_SeqDense,
                                       MatNorm_SeqDense,
                                       /* 20*/ MatAssemblyBegin_SeqDense,
                                       MatAssemblyEnd_SeqDense,
                                       MatSetOption_SeqDense,
                                       MatZeroEntries_SeqDense,
                                       /* 24*/ MatZeroRows_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 29*/ MatSetUp_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 34*/ MatDuplicate_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 39*/ MatAXPY_SeqDense,
                                       MatCreateSubMatrices_SeqDense,
                                       NULL,
                                       MatGetValues_SeqDense,
                                       MatCopy_SeqDense,
                                       /* 44*/ MatGetRowMax_SeqDense,
                                       MatScale_SeqDense,
                                       MatShift_SeqDense,
                                       NULL,
                                       MatZeroRowsColumns_SeqDense,
                                       /* 49*/ MatSetRandom_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 54*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 59*/ MatCreateSubMatrix_SeqDense,
                                       MatDestroy_SeqDense,
                                       MatView_SeqDense,
                                       NULL,
                                       NULL,
                                       /* 64*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 69*/ MatGetRowMaxAbs_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 74*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 79*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 83*/ MatLoad_SeqDense,
                                       MatIsSymmetric_SeqDense,
                                       MatIsHermitian_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /* 89*/ NULL,
                                       NULL,
                                       MatMatMultNumeric_SeqDense_SeqDense,
                                       NULL,
                                       NULL,
                                       /* 94*/ NULL,
                                       NULL,
                                       NULL,
                                       MatMatTransposeMultNumeric_SeqDense_SeqDense,
                                       NULL,
                                       /* 99*/ MatProductSetFromOptions_SeqDense,
                                       NULL,
                                       NULL,
                                       MatConjugate_SeqDense,
                                       NULL,
                                       /*104*/ NULL,
                                       MatRealPart_SeqDense,
                                       MatImaginaryPart_SeqDense,
                                       NULL,
                                       NULL,
                                       /*109*/ NULL,
                                       NULL,
                                       MatGetRowMin_SeqDense,
                                       MatGetColumnVector_SeqDense,
                                       MatMissingDiagonal_SeqDense,
                                       /*114*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /*119*/ NULL,
                                       NULL,
                                       MatMultHermitianTranspose_SeqDense,
                                       MatMultHermitianTransposeAdd_SeqDense,
                                       NULL,
                                       /*124*/ NULL,
                                       MatGetColumnReductions_SeqDense,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /*129*/ NULL,
                                       NULL,
                                       NULL,
                                       MatTransposeMatMultNumeric_SeqDense_SeqDense,
                                       NULL,
                                       /*134*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /*139*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       MatCreateMPIMatConcatenateSeqMat_SeqDense,
                                       /*145*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /*150*/ NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       NULL,
                                       /*155*/ NULL,
                                       NULL};

/*@
  MatCreateSeqDense - Creates a `MATSEQDENSE` that
  is stored in column major order (the usual Fortran format).

  Collective

  Input Parameters:
+ comm - MPI communicator, set to `PETSC_COMM_SELF`
. m    - number of rows
. n    - number of columns
- data - optional location of matrix data in column major order.  Use `NULL` for PETSc
         to control all matrix memory allocation.

  Output Parameter:
. A - the matrix

  Level: intermediate

  Note:
  The data input variable is intended primarily for Fortran programmers
  who wish to allocate their own matrix memory space.  Most users should
  set `data` = `NULL`.

  Developer Note:
  Many of the matrix operations for this variant use the BLAS and LAPACK routines.

.seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
@*/
PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar data[], Mat *A)
{
  PetscFunctionBegin;
  PetscCall(MatCreate(comm, A));
  PetscCall(MatSetSizes(*A, m, n, m, n));
  PetscCall(MatSetType(*A, MATSEQDENSE));
  PetscCall(MatSeqDenseSetPreallocation(*A, data));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix

  Collective

  Input Parameters:
+ B    - the matrix
- data - the array (or `NULL`)

  Level: intermediate

  Note:
  The data input variable is intended primarily for Fortran programmers
  who wish to allocate their own matrix memory space.  Most users should
  need not call this routine.

.seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
@*/
PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
  PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
{
  Mat_SeqDense *b = (Mat_SeqDense *)B->data;

  PetscFunctionBegin;
  PetscCheck(!b->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  B->preallocated = PETSC_TRUE;

  PetscCall(PetscLayoutSetUp(B->rmap));
  PetscCall(PetscLayoutSetUp(B->cmap));

  if (b->lda <= 0) PetscCall(PetscBLASIntCast(B->rmap->n, &b->lda));

  if (!data) { /* petsc-allocated storage */
    if (!b->user_alloc) PetscCall(PetscFree(b->v));
    PetscCall(PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v));

    b->user_alloc = PETSC_FALSE;
  } else { /* user-allocated storage */
    if (!b->user_alloc) PetscCall(PetscFree(b->v));
    b->v          = data;
    b->user_alloc = PETSC_TRUE;
  }
  B->assembled = PETSC_TRUE;
  PetscFunctionReturn(PETSC_SUCCESS);
}

#if defined(PETSC_HAVE_ELEMENTAL)
PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
{
  Mat                mat_elemental;
  const PetscScalar *array;
  PetscScalar       *v_colwise;
  PetscInt           M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;

  PetscFunctionBegin;
  PetscCall(PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols));
  PetscCall(MatDenseGetArrayRead(A, &array));
  /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
  k = 0;
  for (j = 0; j < N; j++) {
    cols[j] = j;
    for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
  }
  for (i = 0; i < M; i++) rows[i] = i;
  PetscCall(MatDenseRestoreArrayRead(A, &array));

  PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
  PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
  PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
  PetscCall(MatSetUp(mat_elemental));

  /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
  PetscCall(MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES));
  PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
  PetscCall(PetscFree3(v_colwise, rows, cols));

  if (reuse == MAT_INPLACE_MATRIX) {
    PetscCall(MatHeaderReplace(A, &mat_elemental));
  } else {
    *newmat = mat_elemental;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}
#endif

PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
{
  Mat_SeqDense *b = (Mat_SeqDense *)B->data;
  PetscBool     data;

  PetscFunctionBegin;
  data = (B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
  PetscCheck(b->user_alloc || !data || b->lda == lda, PETSC_COMM_SELF, PETSC_ERR_ORDER, "LDA cannot be changed after allocation of internal storage");
  PetscCheck(lda >= B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "LDA %" PetscInt_FMT " must be at least matrix dimension %" PetscInt_FMT, lda, B->rmap->n);
  PetscCall(PetscBLASIntCast(lda, &b->lda));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
{
  PetscFunctionBegin;
  PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseCreateColumnVec_Private(Mat A, Vec *v)
{
  PetscBool   isstd, iskok, iscuda, iship;
  PetscMPIInt size;
#if PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)
  /* we pass the data of A, to prevent allocating needless GPU memory the first time VecCUPMPlaceArray is called. */
  const PetscScalar *a;
#endif

  PetscFunctionBegin;
  *v = NULL;
  PetscCall(PetscStrcmpAny(A->defaultvectype, &isstd, VECSTANDARD, VECSEQ, VECMPI, ""));
  PetscCall(PetscStrcmpAny(A->defaultvectype, &iskok, VECKOKKOS, VECSEQKOKKOS, VECMPIKOKKOS, ""));
  PetscCall(PetscStrcmpAny(A->defaultvectype, &iscuda, VECCUDA, VECSEQCUDA, VECMPICUDA, ""));
  PetscCall(PetscStrcmpAny(A->defaultvectype, &iship, VECHIP, VECSEQHIP, VECMPIHIP, ""));
  PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
  if (isstd) {
    if (size > 1) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
    else PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
  } else if (iskok) {
    PetscCheck(PetscDefined(HAVE_KOKKOS_KERNELS), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using KOKKOS kernels support");
#if PetscDefined(HAVE_KOKKOS_KERNELS)
    if (size > 1) PetscCall(VecCreateMPIKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, v));
    else PetscCall(VecCreateSeqKokkosWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, v));
#endif
  } else if (iscuda) {
    PetscCheck(PetscDefined(HAVE_CUDA), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using CUDA support");
#if PetscDefined(HAVE_CUDA)
    PetscCall(MatDenseCUDAGetArrayRead(A, &a));
    if (size > 1) PetscCall(VecCreateMPICUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
    else PetscCall(VecCreateSeqCUDAWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
#endif
  } else if (iship) {
    PetscCheck(PetscDefined(HAVE_HIP), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Reconfigure using HIP support");
#if PetscDefined(HAVE_HIP)
    PetscCall(MatDenseHIPGetArrayRead(A, &a));
    if (size > 1) PetscCall(VecCreateMPIHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, a, v));
    else PetscCall(VecCreateSeqHIPWithArrays(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, a, v));
#endif
  }
  PetscCheck(*v, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not coded for type %s", A->defaultvectype);
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
  a->vecinuse = col + 1;
  PetscCall(MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse));
  PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda));
  *v = a->cvec;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
  PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
  VecCheckAssembled(a->cvec);
  a->vecinuse = 0;
  PetscCall(MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse));
  PetscCall(VecResetArray(a->cvec));
  if (v) *v = NULL;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
  a->vecinuse = col + 1;
  PetscCall(MatDenseGetArrayRead(A, &a->ptrinuse));
  PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
  PetscCall(VecLockReadPush(a->cvec));
  *v = a->cvec;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
  PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
  VecCheckAssembled(a->cvec);
  a->vecinuse = 0;
  PetscCall(MatDenseRestoreArrayRead(A, &a->ptrinuse));
  PetscCall(VecLockReadPop(a->cvec));
  PetscCall(VecResetArray(a->cvec));
  if (v) *v = NULL;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
  a->vecinuse = col + 1;
  PetscCall(MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse));
  PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)a->lda)));
  *v = a->cvec;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
  PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
  VecCheckAssembled(a->cvec);
  a->vecinuse = 0;
  PetscCall(MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse));
  PetscCall(VecResetArray(a->cvec));
  if (v) *v = NULL;
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(!a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
  PetscCheck(!a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
  if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) PetscCall(MatDestroy(&a->cmat));
  if (!a->cmat) {
    PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda), &a->cmat));
  } else {
    PetscCall(MatDensePlaceArray(a->cmat, PetscSafePointerPlusOffset(a->v, rbegin + (size_t)cbegin * a->lda)));
  }
  PetscCall(MatDenseSetLDA(a->cmat, a->lda));
  a->matinuse = cbegin + 1;
  *v          = a->cmat;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
{
  Mat_SeqDense *a = (Mat_SeqDense *)A->data;

  PetscFunctionBegin;
  PetscCheck(a->matinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
  PetscCheck(a->cmat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column matrix");
  PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
  a->matinuse = 0;
  PetscCall(MatDenseResetArray(a->cmat));
  if (v) *v = NULL;
#if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
  A->offloadmask = PETSC_OFFLOAD_CPU;
#endif
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*MC
   MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.

   Options Database Key:
. -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`

  Level: beginner

.seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MatCreateSeqDense()`
M*/
PetscErrorCode MatCreate_SeqDense(Mat B)
{
  Mat_SeqDense *b;
  PetscMPIInt   size;

  PetscFunctionBegin;
  PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
  PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");

  PetscCall(PetscNew(&b));
  B->data   = (void *)b;
  B->ops[0] = MatOps_Values;

  b->roworiented = PETSC_TRUE;

  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ));
#if defined(PETSC_HAVE_ELEMENTAL)
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental));
#endif
#if defined(PETSC_HAVE_SCALAPACK)
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK));
#endif
#if defined(PETSC_HAVE_CUDA)
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense));
#endif
#if defined(PETSC_HAVE_HIP)
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense));
#endif
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense));

  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultAddColumnRange_C", MatMultAddColumnRange_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_SeqDense));
  PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_SeqDense));
  PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.

  Not Collective

  Input Parameters:
+ A   - a `MATSEQDENSE` or `MATMPIDENSE` matrix
- col - column index

  Output Parameter:
. vals - pointer to the data

  Level: intermediate

  Note:
  Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
@*/
PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar *vals[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscAssertPointer(vals, 3);
  PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.

  Not Collective

  Input Parameters:
+ A    - a `MATSEQDENSE` or `MATMPIDENSE` matrix
- vals - pointer to the data (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetColumn()`
@*/
PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar *vals[])
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscAssertPointer(vals, 2);
  PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
- col - the column index

  Output Parameter:
. v - the vector

  Level: intermediate

  Notes:
  The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.

  Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
@*/
PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscAssertPointer(v, 3);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVec()`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
. col - the column index
- v   - the `Vec` object (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
@*/
PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a `Vec`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
- col - the column index

  Output Parameter:
. v - the vector

  Level: intermediate

  Notes:
  The vector is owned by PETSc and users cannot modify it.

  Users need to call `MatDenseRestoreColumnVecRead()` when the vector is no longer needed.

  Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecWrite()` for write-only access.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
@*/
PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscAssertPointer(v, 3);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecRead()`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
. col - the column index
- v   - the `Vec` object (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
@*/
PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a `Vec`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
- col - the column index

  Output Parameter:
. v - the vector

  Level: intermediate

  Notes:
  The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVecWrite()` when the vector is no longer needed.

  Use `MatDenseGetColumnVec()` to obtain read-write access or `MatDenseGetColumnVecRead()` for read-only access.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
@*/
PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscAssertPointer(v, 3);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from `MatDenseGetColumnVecWrite()`.

  Collective

  Input Parameters:
+ A   - the `Mat` object
. col - the column index
- v   - the `Vec` object (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
@*/
PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, col, 2);
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(col >= 0 && col < A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid col %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT ")", col, A->cmap->N);
  PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a `Mat`.

  Collective

  Input Parameters:
+ A      - the `Mat` object
. rbegin - the first global row index in the block (if `PETSC_DECIDE`, is 0)
. rend   - the global row index past the last one in the block (if `PETSC_DECIDE`, is `M`)
. cbegin - the first global column index in the block (if `PETSC_DECIDE`, is 0)
- cend   - the global column index past the last one in the block (if `PETSC_DECIDE`, is `N`)

  Output Parameter:
. v - the matrix

  Level: intermediate

  Notes:
  The matrix is owned by PETSc. Users need to call `MatDenseRestoreSubMatrix()` when the matrix is no longer needed.

  The output matrix is not redistributed by PETSc, so depending on the values of `rbegin` and `rend`, some processes may have no local rows.

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
@*/
PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscValidLogicalCollectiveInt(A, rbegin, 2);
  PetscValidLogicalCollectiveInt(A, rend, 3);
  PetscValidLogicalCollectiveInt(A, cbegin, 4);
  PetscValidLogicalCollectiveInt(A, cend, 5);
  PetscAssertPointer(v, 6);
  if (rbegin == PETSC_DECIDE) rbegin = 0;
  if (rend == PETSC_DECIDE) rend = A->rmap->N;
  if (cbegin == PETSC_DECIDE) cbegin = 0;
  if (cend == PETSC_DECIDE) cend = A->cmap->N;
  PetscCheck(A->preallocated, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Matrix not preallocated");
  PetscCheck(rbegin >= 0 && rbegin <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", rbegin, A->rmap->N);
  PetscCheck(rend >= rbegin && rend <= A->rmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid rend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", rend, rbegin, A->rmap->N);
  PetscCheck(cbegin >= 0 && cbegin <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cbegin %" PetscInt_FMT ", should be in [0,%" PetscInt_FMT "]", cbegin, A->cmap->N);
  PetscCheck(cend >= cbegin && cend <= A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid cend %" PetscInt_FMT ", should be in [%" PetscInt_FMT ",%" PetscInt_FMT "]", cend, cbegin, A->cmap->N);
  PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@
  MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from `MatDenseGetSubMatrix()`.

  Collective

  Input Parameters:
+ A - the `Mat` object
- v - the `Mat` object (may be `NULL`)

  Level: intermediate

.seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
@*/
PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
{
  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscValidType(A, 1);
  PetscAssertPointer(v, 2);
  PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
  PetscFunctionReturn(PETSC_SUCCESS);
}

#include <petscblaslapack.h>
#include <petsc/private/kernels/blockinvert.h>

PetscErrorCode MatSeqDenseInvert(Mat A)
{
  PetscInt        m;
  const PetscReal shift = 0.0;
  PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
  PetscScalar    *values;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
  PetscCall(MatDenseGetArray(A, &values));
  PetscCall(MatGetLocalSize(A, &m, NULL));
  allowzeropivot = PetscNot(A->erroriffailure);
  /* factor and invert each block */
  switch (m) {
  case 1:
    values[0] = (PetscScalar)1.0 / (values[0] + shift);
    break;
  case 2:
    PetscCall(PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    break;
  case 3:
    PetscCall(PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    break;
  case 4:
    PetscCall(PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    break;
  case 5: {
    PetscScalar work[25];
    PetscInt    ipvt[5];

    PetscCall(PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
  } break;
  case 6:
    PetscCall(PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    break;
  case 7:
    PetscCall(PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    break;
  default: {
    PetscInt    *v_pivots, *IJ, j;
    PetscScalar *v_work;

    PetscCall(PetscMalloc3(m, &v_work, m, &v_pivots, m, &IJ));
    for (j = 0; j < m; j++) IJ[j] = j;
    PetscCall(PetscKernel_A_gets_inverse_A(m, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
    if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
    PetscCall(PetscFree3(v_work, v_pivots, IJ));
  }
  }
  PetscCall(MatDenseRestoreArray(A, &values));
  PetscFunctionReturn(PETSC_SUCCESS);
}
