#include <petsc/private/pcpatchimpl.h> /*I "petscpc.h" I*/
#include <petsc/private/kspimpl.h>     /* For ksp->setfromoptionscalled */
#include <petsc/private/vecimpl.h>     /* For vec->map */
#include <petsc/private/dmpleximpl.h>  /* For DMPlexComputeJacobian_Patch_Internal() */
#include <petscsf.h>
#include <petscbt.h>
#include <petscds.h>
#include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/

PetscBool  PCPatchcite       = PETSC_FALSE;
const char PCPatchCitation[] = "@article{FarrellKnepleyWechsungMitchell2020,\n"
                               "title   = {{PCPATCH}: software for the topological construction of multigrid relaxation methods},\n"
                               "author  = {Patrick E Farrell and Matthew G Knepley and Lawrence Mitchell and Florian Wechsung},\n"
                               "journal = {ACM Transaction on Mathematical Software},\n"
                               "eprint  = {http://arxiv.org/abs/1912.08516},\n"
                               "volume  = {47},\n"
                               "number  = {3},\n"
                               "pages   = {1--22},\n"
                               "year    = {2021},\n"
                               "petsc_uses={KSP,DMPlex}\n}\n";

PetscLogEvent PC_Patch_CreatePatches, PC_Patch_ComputeOp, PC_Patch_Solve, PC_Patch_Apply, PC_Patch_Prealloc;

static inline PetscErrorCode ObjectView(PetscObject obj, PetscViewer viewer, PetscViewerFormat format)
{
  PetscCall(PetscViewerPushFormat(viewer, format));
  PetscCall(PetscObjectView(obj, viewer));
  PetscCall(PetscViewerPopFormat(viewer));
  return PETSC_SUCCESS;
}

static PetscErrorCode PCPatchConstruct_Star(void *vpatch, DM dm, PetscInt point, PetscHSetI ht)
{
  PetscInt  starSize;
  PetscInt *star = NULL, si;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(ht));
  /* To start with, add the point we care about */
  PetscCall(PetscHSetIAdd(ht, point));
  /* Loop over all the points that this point connects to */
  PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_FALSE, &starSize, &star));
  for (si = 0; si < starSize * 2; si += 2) PetscCall(PetscHSetIAdd(ht, star[si]));
  PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_FALSE, &starSize, &star));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchConstruct_Vanka(void *vpatch, DM dm, PetscInt point, PetscHSetI ht)
{
  PC_PATCH *patch = (PC_PATCH *)vpatch;
  PetscInt  starSize;
  PetscInt *star         = NULL;
  PetscBool shouldIgnore = PETSC_FALSE;
  PetscInt  cStart, cEnd, iStart, iEnd, si;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(ht));
  /* To start with, add the point we care about */
  PetscCall(PetscHSetIAdd(ht, point));
  /* Should we ignore any points of a certain dimension? */
  if (patch->vankadim >= 0) {
    shouldIgnore = PETSC_TRUE;
    PetscCall(DMPlexGetDepthStratum(dm, patch->vankadim, &iStart, &iEnd));
  }
  PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
  /* Loop over all the cells that this point connects to */
  PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_FALSE, &starSize, &star));
  for (si = 0; si < starSize * 2; si += 2) {
    const PetscInt cell = star[si];
    PetscInt       closureSize;
    PetscInt      *closure = NULL, ci;

    if (cell < cStart || cell >= cEnd) continue;
    /* now loop over all entities in the closure of that cell */
    PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &closureSize, &closure));
    for (ci = 0; ci < closureSize * 2; ci += 2) {
      const PetscInt newpoint = closure[ci];

      /* We've been told to ignore entities of this type.*/
      if (shouldIgnore && newpoint >= iStart && newpoint < iEnd) continue;
      PetscCall(PetscHSetIAdd(ht, newpoint));
    }
    PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &closureSize, &closure));
  }
  PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_FALSE, &starSize, &star));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchConstruct_Pardecomp(void *vpatch, DM dm, PetscInt point, PetscHSetI ht)
{
  PC_PATCH       *patch   = (PC_PATCH *)vpatch;
  DMLabel         ghost   = NULL;
  const PetscInt *leaves  = NULL;
  PetscInt        nleaves = 0, pStart, pEnd, loc;
  PetscBool       isFiredrake;
  PetscBool       flg;
  PetscInt        starSize;
  PetscInt       *star = NULL;
  PetscInt        opoint, overlapi;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(ht));

  PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));

  PetscCall(DMHasLabel(dm, "pyop2_ghost", &isFiredrake));
  if (isFiredrake) {
    PetscCall(DMGetLabel(dm, "pyop2_ghost", &ghost));
    PetscCall(DMLabelCreateIndex(ghost, pStart, pEnd));
  } else {
    PetscSF sf;
    PetscCall(DMGetPointSF(dm, &sf));
    PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &leaves, NULL));
    nleaves = PetscMax(nleaves, 0);
  }

  for (opoint = pStart; opoint < pEnd; ++opoint) {
    if (ghost) PetscCall(DMLabelHasPoint(ghost, opoint, &flg));
    else {
      PetscCall(PetscFindInt(opoint, nleaves, leaves, &loc));
      flg = loc >= 0 ? PETSC_TRUE : PETSC_FALSE;
    }
    /* Not an owned entity, don't make a cell patch. */
    if (flg) continue;
    PetscCall(PetscHSetIAdd(ht, opoint));
  }

  /* Now build the overlap for the patch */
  for (overlapi = 0; overlapi < patch->pardecomp_overlap; ++overlapi) {
    PetscInt  index    = 0;
    PetscInt *htpoints = NULL;
    PetscInt  htsize;
    PetscInt  i;

    PetscCall(PetscHSetIGetSize(ht, &htsize));
    PetscCall(PetscMalloc1(htsize, &htpoints));
    PetscCall(PetscHSetIGetElems(ht, &index, htpoints));

    for (i = 0; i < htsize; ++i) {
      PetscInt hpoint = htpoints[i];
      PetscInt si;

      PetscCall(DMPlexGetTransitiveClosure(dm, hpoint, PETSC_FALSE, &starSize, &star));
      for (si = 0; si < starSize * 2; si += 2) {
        const PetscInt starp = star[si];
        PetscInt       closureSize;
        PetscInt      *closure = NULL, ci;

        /* now loop over all entities in the closure of starp */
        PetscCall(DMPlexGetTransitiveClosure(dm, starp, PETSC_TRUE, &closureSize, &closure));
        for (ci = 0; ci < closureSize * 2; ci += 2) {
          const PetscInt closstarp = closure[ci];
          PetscCall(PetscHSetIAdd(ht, closstarp));
        }
        PetscCall(DMPlexRestoreTransitiveClosure(dm, starp, PETSC_TRUE, &closureSize, &closure));
      }
      PetscCall(DMPlexRestoreTransitiveClosure(dm, hpoint, PETSC_FALSE, &starSize, &star));
    }
    PetscCall(PetscFree(htpoints));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* The user's already set the patches in patch->userIS. Build the hash tables */
static PetscErrorCode PCPatchConstruct_User(void *vpatch, DM dm, PetscInt point, PetscHSetI ht)
{
  PC_PATCH       *patch   = (PC_PATCH *)vpatch;
  IS              patchis = patch->userIS[point];
  PetscInt        n;
  const PetscInt *patchdata;
  PetscInt        pStart, pEnd, i;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(ht));
  PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
  PetscCall(ISGetLocalSize(patchis, &n));
  PetscCall(ISGetIndices(patchis, &patchdata));
  for (i = 0; i < n; ++i) {
    const PetscInt ownedpoint = patchdata[i];

    PetscCheck(ownedpoint >= pStart && ownedpoint < pEnd, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Mesh point %" PetscInt_FMT " was not in [%" PetscInt_FMT ", %" PetscInt_FMT ")", ownedpoint, pStart, pEnd);
    PetscCall(PetscHSetIAdd(ht, ownedpoint));
  }
  PetscCall(ISRestoreIndices(patchis, &patchdata));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchCreateDefaultSF_Private(PC pc, PetscInt n, const PetscSF *sf, const PetscInt *bs)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  if (n == 1 && bs[0] == 1) {
    patch->sectionSF = sf[0];
    PetscCall(PetscObjectReference((PetscObject)patch->sectionSF));
  } else {
    PetscInt     allRoots = 0, allLeaves = 0;
    PetscInt     leafOffset    = 0;
    PetscInt    *ilocal        = NULL;
    PetscSFNode *iremote       = NULL;
    PetscInt    *remoteOffsets = NULL;
    PetscInt     index         = 0;
    PetscHMapI   rankToIndex;
    PetscInt     numRanks = 0;
    PetscSFNode *remote   = NULL;
    PetscSF      rankSF;
    PetscInt    *ranks   = NULL;
    PetscInt    *offsets = NULL;
    MPI_Datatype contig;
    PetscHSetI   ranksUniq;
    PetscMPIInt  in;

    /* First figure out how many dofs there are in the concatenated numbering.
       allRoots: number of owned global dofs;
       allLeaves: number of visible dofs (global + ghosted).
    */
    for (PetscInt i = 0; i < n; ++i) {
      PetscInt nroots, nleaves;

      PetscCall(PetscSFGetGraph(sf[i], &nroots, &nleaves, NULL, NULL));
      allRoots += nroots * bs[i];
      allLeaves += nleaves * bs[i];
    }
    PetscCall(PetscMalloc1(allLeaves, &ilocal));
    PetscCall(PetscMalloc1(allLeaves, &iremote));
    /* Now build an SF that just contains process connectivity. */
    PetscCall(PetscHSetICreate(&ranksUniq));
    for (PetscInt i = 0; i < n; ++i) {
      const PetscMPIInt *ranks = NULL;
      PetscMPIInt        nranks;

      PetscCall(PetscSFSetUp(sf[i]));
      PetscCall(PetscSFGetRootRanks(sf[i], &nranks, &ranks, NULL, NULL, NULL));
      /* These are all the ranks who communicate with me. */
      for (PetscMPIInt j = 0; j < nranks; ++j) PetscCall(PetscHSetIAdd(ranksUniq, (PetscInt)ranks[j]));
    }
    PetscCall(PetscHSetIGetSize(ranksUniq, &numRanks));
    PetscCall(PetscMalloc1(numRanks, &remote));
    PetscCall(PetscMalloc1(numRanks, &ranks));
    PetscCall(PetscHSetIGetElems(ranksUniq, &index, ranks));

    PetscCall(PetscHMapICreate(&rankToIndex));
    for (PetscInt i = 0; i < numRanks; ++i) {
      remote[i].rank  = ranks[i];
      remote[i].index = 0;
      PetscCall(PetscHMapISet(rankToIndex, ranks[i], i));
    }
    PetscCall(PetscFree(ranks));
    PetscCall(PetscHSetIDestroy(&ranksUniq));
    PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &rankSF));
    PetscCall(PetscSFSetGraph(rankSF, 1, numRanks, NULL, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
    PetscCall(PetscSFSetUp(rankSF));
    /* OK, use it to communicate the root offset on the remote processes for each subspace. */
    PetscCall(PetscMalloc1(n, &offsets));
    PetscCall(PetscMalloc1(n * numRanks, &remoteOffsets));

    offsets[0] = 0;
    for (PetscInt i = 1; i < n; ++i) {
      PetscInt nroots;

      PetscCall(PetscSFGetGraph(sf[i - 1], &nroots, NULL, NULL, NULL));
      offsets[i] = offsets[i - 1] + nroots * bs[i - 1];
    }
    /* Offsets are the offsets on the current process of the global dof numbering for the subspaces. */
    PetscCall(PetscMPIIntCast(n, &in));
    PetscCallMPI(MPI_Type_contiguous(in, MPIU_INT, &contig));
    PetscCallMPI(MPI_Type_commit(&contig));

    PetscCall(PetscSFBcastBegin(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE));
    PetscCall(PetscSFBcastEnd(rankSF, contig, offsets, remoteOffsets, MPI_REPLACE));
    PetscCallMPI(MPI_Type_free(&contig));
    PetscCall(PetscFree(offsets));
    PetscCall(PetscSFDestroy(&rankSF));
    /* Now remoteOffsets contains the offsets on the remote
      processes who communicate with me.  So now we can
      concatenate the list of SFs into a single one. */
    index = 0;
    for (PetscInt i = 0; i < n; ++i) {
      const PetscSFNode *remote = NULL;
      const PetscInt    *local  = NULL;
      PetscInt           nroots, nleaves, j;

      PetscCall(PetscSFGetGraph(sf[i], &nroots, &nleaves, &local, &remote));
      for (j = 0; j < nleaves; ++j) {
        PetscInt rank = remote[j].rank;
        PetscInt idx, rootOffset, k;

        PetscCall(PetscHMapIGet(rankToIndex, rank, &idx));
        PetscCheck(idx != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Didn't find rank, huh?");
        /* Offset on given rank for ith subspace */
        rootOffset = remoteOffsets[n * idx + i];
        for (k = 0; k < bs[i]; ++k) {
          ilocal[index]        = (local ? local[j] : j) * bs[i] + k + leafOffset;
          iremote[index].rank  = remote[j].rank;
          iremote[index].index = remote[j].index * bs[i] + k + rootOffset;
          ++index;
        }
      }
      leafOffset += nleaves * bs[i];
    }
    PetscCall(PetscHMapIDestroy(&rankToIndex));
    PetscCall(PetscFree(remoteOffsets));
    PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &patch->sectionSF));
    PetscCall(PetscSFSetGraph(patch->sectionSF, allRoots, allLeaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
static PetscErrorCode PCPatchGetIgnoreDim(PC pc, PetscInt *dim)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *dim = patch->ignoredim;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetSaveOperators(PC pc, PetscBool flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->save_operators = flg;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetSaveOperators(PC pc, PetscBool *flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *flg = patch->save_operators;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetPrecomputeElementTensors(PC pc, PetscBool flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->precomputeElementTensors = flg;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetPrecomputeElementTensors(PC pc, PetscBool *flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *flg = patch->precomputeElementTensors;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetPartitionOfUnity(PC pc, PetscBool flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->partition_of_unity = flg;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetPartitionOfUnity(PC pc, PetscBool *flg)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *flg = patch->partition_of_unity;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
static PetscErrorCode PCPatchSetLocalComposition(PC pc, PCCompositeType type)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  PetscCheck(type == PC_COMPOSITE_ADDITIVE || type == PC_COMPOSITE_MULTIPLICATIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Only supports additive or multiplicative as the local type");
  patch->local_composition_type = type;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetSubKSP(PC pc, PetscInt *npatch, KSP **ksp)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
  PetscCall(PetscMalloc1(patch->npatch, ksp));
  for (PetscInt i = 0; i < patch->npatch; ++i) (*ksp)[i] = (KSP)patch->solver[i];
  if (npatch) *npatch = patch->npatch;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetSubMatType(PC pc, MatType sub_mat_type)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  if (patch->sub_mat_type) PetscCall(PetscFree(patch->sub_mat_type));
  PetscCall(PetscStrallocpy(sub_mat_type, (char **)&patch->sub_mat_type));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetSubMatType(PC pc, MatType *sub_mat_type)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *sub_mat_type = patch->sub_mat_type;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetCellNumbering(PC pc, PetscSection cellNumbering)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->cellNumbering = cellNumbering;
  PetscCall(PetscObjectReference((PetscObject)cellNumbering));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetCellNumbering(PC pc, PetscSection *cellNumbering)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *cellNumbering = patch->cellNumbering;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetConstructType(PC pc, PCPatchConstructType ctype, PetscErrorCode (*func)(PC, PetscInt *, IS **, IS *, void *), void *ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->ctype = ctype;
  switch (ctype) {
  case PC_PATCH_STAR:
    patch->user_patches     = PETSC_FALSE;
    patch->patchconstructop = PCPatchConstruct_Star;
    break;
  case PC_PATCH_VANKA:
    patch->user_patches     = PETSC_FALSE;
    patch->patchconstructop = PCPatchConstruct_Vanka;
    break;
  case PC_PATCH_PARDECOMP:
    patch->user_patches     = PETSC_FALSE;
    patch->patchconstructop = PCPatchConstruct_Pardecomp;
    break;
  case PC_PATCH_USER:
  case PC_PATCH_PYTHON:
    patch->user_patches     = PETSC_TRUE;
    patch->patchconstructop = PCPatchConstruct_User;
    if (func) {
      patch->userpatchconstructionop = func;
      patch->userpatchconstructctx   = ctx;
    }
    break;
  default:
    SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Unknown patch construction type %" PetscInt_FMT, (PetscInt)patch->ctype);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchGetConstructType(PC pc, PCPatchConstructType *ctype, PetscErrorCode (**func)(PC, PetscInt *, IS **, IS *, void *), void **ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  *ctype = patch->ctype;
  switch (patch->ctype) {
  case PC_PATCH_STAR:
  case PC_PATCH_VANKA:
  case PC_PATCH_PARDECOMP:
    break;
  case PC_PATCH_USER:
  case PC_PATCH_PYTHON:
    *func = patch->userpatchconstructionop;
    *ctx  = patch->userpatchconstructctx;
    break;
  default:
    SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Unknown patch construction type %" PetscInt_FMT, (PetscInt)patch->ctype);
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
PetscErrorCode PCPatchSetDiscretisationInfo(PC pc, PetscInt nsubspaces, DM *dms, PetscInt *bs, PetscInt *nodesPerCell, const PetscInt **cellNodeMap, const PetscInt *subspaceOffsets, PetscInt numGhostBcs, const PetscInt *ghostBcNodes, PetscInt numGlobalBcs, const PetscInt *globalBcNodes)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  DM        dm, plex;
  PetscSF  *sfs;
  PetscInt  cStart, cEnd, i, j;

  PetscFunctionBegin;
  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
  PetscCall(PetscMalloc1(nsubspaces, &sfs));
  PetscCall(PetscMalloc1(nsubspaces, &patch->dofSection));
  PetscCall(PetscMalloc1(nsubspaces, &patch->bs));
  PetscCall(PetscMalloc1(nsubspaces, &patch->nodesPerCell));
  PetscCall(PetscMalloc1(nsubspaces, &patch->cellNodeMap));
  PetscCall(PetscMalloc1(nsubspaces + 1, &patch->subspaceOffsets));

  patch->nsubspaces       = nsubspaces;
  patch->totalDofsPerCell = 0;
  for (i = 0; i < nsubspaces; ++i) {
    PetscCall(DMGetLocalSection(dms[i], &patch->dofSection[i]));
    PetscCall(PetscObjectReference((PetscObject)patch->dofSection[i]));
    PetscCall(DMGetSectionSF(dms[i], &sfs[i]));
    patch->bs[i]           = bs[i];
    patch->nodesPerCell[i] = nodesPerCell[i];
    patch->totalDofsPerCell += nodesPerCell[i] * bs[i];
    PetscCall(PetscMalloc1((cEnd - cStart) * nodesPerCell[i], &patch->cellNodeMap[i]));
    for (j = 0; j < (cEnd - cStart) * nodesPerCell[i]; ++j) patch->cellNodeMap[i][j] = cellNodeMap[i][j];
    patch->subspaceOffsets[i] = subspaceOffsets[i];
  }
  PetscCall(PCPatchCreateDefaultSF_Private(pc, nsubspaces, sfs, patch->bs));
  PetscCall(PetscFree(sfs));

  patch->subspaceOffsets[nsubspaces] = subspaceOffsets[nsubspaces];
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGhostBcs, ghostBcNodes, PETSC_COPY_VALUES, &patch->ghostBcNodes));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGlobalBcs, globalBcNodes, PETSC_COPY_VALUES, &patch->globalBcNodes));
  PetscCall(DMDestroy(&dm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* TODO: Docs */
static PetscErrorCode PCPatchSetDiscretisationInfoCombined(PC pc, DM dm, PetscInt *nodesPerCell, const PetscInt **cellNodeMap, PetscInt numGhostBcs, const PetscInt *ghostBcNodes, PetscInt numGlobalBcs, const PetscInt *globalBcNodes)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  PetscInt  cStart, cEnd, i, j;

  PetscFunctionBegin;
  patch->combined = PETSC_TRUE;
  PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
  PetscCall(DMGetNumFields(dm, &patch->nsubspaces));
  PetscCall(PetscCalloc1(patch->nsubspaces, &patch->dofSection));
  PetscCall(PetscMalloc1(patch->nsubspaces, &patch->bs));
  PetscCall(PetscMalloc1(patch->nsubspaces, &patch->nodesPerCell));
  PetscCall(PetscMalloc1(patch->nsubspaces, &patch->cellNodeMap));
  PetscCall(PetscCalloc1(patch->nsubspaces + 1, &patch->subspaceOffsets));
  PetscCall(DMGetLocalSection(dm, &patch->dofSection[0]));
  PetscCall(PetscObjectReference((PetscObject)patch->dofSection[0]));
  PetscCall(PetscSectionGetStorageSize(patch->dofSection[0], &patch->subspaceOffsets[patch->nsubspaces]));
  patch->totalDofsPerCell = 0;
  for (i = 0; i < patch->nsubspaces; ++i) {
    patch->bs[i]           = 1;
    patch->nodesPerCell[i] = nodesPerCell[i];
    patch->totalDofsPerCell += nodesPerCell[i];
    PetscCall(PetscMalloc1((cEnd - cStart) * nodesPerCell[i], &patch->cellNodeMap[i]));
    for (j = 0; j < (cEnd - cStart) * nodesPerCell[i]; ++j) patch->cellNodeMap[i][j] = cellNodeMap[i][j];
  }
  PetscCall(DMGetSectionSF(dm, &patch->sectionSF));
  PetscCall(PetscObjectReference((PetscObject)patch->sectionSF));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGhostBcs, ghostBcNodes, PETSC_COPY_VALUES, &patch->ghostBcNodes));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGlobalBcs, globalBcNodes, PETSC_COPY_VALUES, &patch->globalBcNodes));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  PCPatchSetComputeFunction - Set the callback function used to compute patch residuals

  Logically Collective

  Input Parameters:
+ pc   - The `PC`
. func - The callback function
- ctx  - The user context

  Calling sequence of `func`:
+ pc               - The `PC`
. point            - The point
. x                - The input solution (not used in linear problems)
. f                - The patch residual vector
. cellIS           - An array of the cell numbers
. n                - The size of `dofsArray`
. dofsArray        - The dofmap for the dofs to be solved for
. dofsArrayWithAll - The dofmap for all dofs on the patch
- ctx              - The user context

  Level: advanced

  Note:
  The entries of `f` (the output residual vector) have been set to zero before the call.

.seealso: [](ch_ksp), `PCPatchSetComputeOperator()`, `PCPatchGetComputeOperator()`, `PCPatchSetDiscretisationInfo()`, `PCPatchSetComputeFunctionInteriorFacets()`
@*/
PetscErrorCode PCPatchSetComputeFunction(PC pc, PetscErrorCode (*func)(PC pc, PetscInt point, Vec x, Vec f, IS cellIS, PetscInt n, const PetscInt *dofsArray, const PetscInt *dofsArrayWithAll, void *ctx), void *ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->usercomputef    = func;
  patch->usercomputefctx = ctx;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  PCPatchSetComputeFunctionInteriorFacets - Set the callback function used to compute facet integrals for patch residuals

  Logically Collective

  Input Parameters:
+ pc   - The `PC`
. func - The callback function
- ctx  - The user context

  Calling sequence of `func`:
+ pc               - The `PC`
. point            - The point
. x                - The input solution (not used in linear problems)
. f                - The patch residual vector
. facetIS          - An array of the facet numbers
. n                - The size of `dofsArray`
. dofsArray        - The dofmap for the dofs to be solved for
. dofsArrayWithAll - The dofmap for all dofs on the patch
- ctx              - The user context

  Level: advanced

  Note:
  The entries of `f` (the output residual vector) have been set to zero before the call.

.seealso: [](ch_ksp), `PCPatchSetComputeOperator()`, `PCPatchGetComputeOperator()`, `PCPatchSetDiscretisationInfo()`, `PCPatchSetComputeFunction()`
@*/
PetscErrorCode PCPatchSetComputeFunctionInteriorFacets(PC pc, PetscErrorCode (*func)(PC pc, PetscInt point, Vec x, Vec f, IS facetIS, PetscInt n, const PetscInt *dofsArray, const PetscInt *dofsArrayWithAll, void *ctx), void *ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->usercomputefintfacet    = func;
  patch->usercomputefintfacetctx = ctx;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  PCPatchSetComputeOperator - Set the callback function used to compute patch matrices

  Logically Collective

  Input Parameters:
+ pc   - The `PC`
. func - The callback function
- ctx  - The user context

  Calling sequence of `func`:
+ pc               - The `PC`
. point            - The point
. x                - The input solution (not used in linear problems)
. mat              - The patch matrix
. facetIS          - An array of the cell numbers
. n                - The size of `dofsArray`
. dofsArray        - The dofmap for the dofs to be solved for
. dofsArrayWithAll - The dofmap for all dofs on the patch
- ctx              - The user context

  Level: advanced

  Note:
  The matrix entries have been set to zero before the call.

.seealso: [](ch_ksp), `PCPatchGetComputeOperator()`, `PCPatchSetComputeFunction()`, `PCPatchSetDiscretisationInfo()`
@*/
PetscErrorCode PCPatchSetComputeOperator(PC pc, PetscErrorCode (*func)(PC pc, PetscInt point, Vec x, Mat mat, IS facetIS, PetscInt n, const PetscInt *dofsArray, const PetscInt *dofsArrayWithAll, void *ctx), void *ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->usercomputeop    = func;
  patch->usercomputeopctx = ctx;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*@C
  PCPatchSetComputeOperatorInteriorFacets - Set the callback function used to compute facet integrals for patch matrices

  Logically Collective

  Input Parameters:
+ pc   - The `PC`
. func - The callback function
- ctx  - The user context

  Calling sequence of `func`:
+ pc               - The `PC`
. point            - The point
. x                - The input solution (not used in linear problems)
. mat              - The patch matrix
. facetIS          - An array of the facet numbers
. n                - The size of `dofsArray`
. dofsArray        - The dofmap for the dofs to be solved for
. dofsArrayWithAll - The dofmap for all dofs on the patch
- ctx              - The user context

  Level: advanced

  Note:
  The matrix entries have been set to zero before the call.

.seealso: [](ch_ksp), `PCPatchGetComputeOperator()`, `PCPatchSetComputeFunction()`, `PCPatchSetDiscretisationInfo()`
@*/
PetscErrorCode PCPatchSetComputeOperatorInteriorFacets(PC pc, PetscErrorCode (*func)(PC pc, PetscInt point, Vec x, Mat mat, IS facetIS, PetscInt n, const PetscInt *dofsArray, const PetscInt *dofsArrayWithAll, void *ctx), void *ctx)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  patch->usercomputeopintfacet    = func;
  patch->usercomputeopintfacetctx = ctx;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* On entry, ht contains the topological entities whose dofs we are responsible for solving for;
   on exit, cht contains all the topological entities we need to compute their residuals.
   In full generality this should incorporate knowledge of the sparsity pattern of the matrix;
   here we assume a standard FE sparsity pattern.*/
/* TODO: Use DMPlexGetAdjacency() */
static PetscErrorCode PCPatchCompleteCellPatch(PC pc, PetscHSetI ht, PetscHSetI cht)
{
  DM            dm, plex;
  PC_PATCH     *patch = (PC_PATCH *)pc->data;
  PetscHashIter hi;
  PetscInt      point;
  PetscInt     *star = NULL, *closure = NULL;
  PetscInt      ignoredim, iStart = 0, iEnd = -1, starSize, closureSize, si, ci;
  PetscInt     *fStar = NULL, *fClosure = NULL;
  PetscInt      fBegin, fEnd, fsi, fci, fStarSize, fClosureSize;

  PetscFunctionBegin;
  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  PetscCall(DMPlexGetHeightStratum(dm, 1, &fBegin, &fEnd));
  PetscCall(PCPatchGetIgnoreDim(pc, &ignoredim));
  if (ignoredim >= 0) PetscCall(DMPlexGetDepthStratum(dm, ignoredim, &iStart, &iEnd));
  PetscCall(PetscHSetIClear(cht));
  PetscHashIterBegin(ht, hi);
  while (!PetscHashIterAtEnd(ht, hi)) {
    PetscHashIterGetKey(ht, hi, point);
    PetscHashIterNext(ht, hi);

    /* Loop over all the cells that this point connects to */
    PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_FALSE, &starSize, &star));
    for (si = 0; si < starSize * 2; si += 2) {
      const PetscInt ownedpoint = star[si];
      /* TODO Check for point in cht before running through closure again */
      /* now loop over all entities in the closure of that cell */
      PetscCall(DMPlexGetTransitiveClosure(dm, ownedpoint, PETSC_TRUE, &closureSize, &closure));
      for (ci = 0; ci < closureSize * 2; ci += 2) {
        const PetscInt seenpoint = closure[ci];
        if (ignoredim >= 0 && seenpoint >= iStart && seenpoint < iEnd) continue;
        PetscCall(PetscHSetIAdd(cht, seenpoint));
        /* Facet integrals couple dofs across facets, so in that case for each of
          the facets we need to add all dofs on the other side of the facet to
          the seen dofs. */
        if (patch->usercomputeopintfacet) {
          if (fBegin <= seenpoint && seenpoint < fEnd) {
            PetscCall(DMPlexGetTransitiveClosure(dm, seenpoint, PETSC_FALSE, &fStarSize, &fStar));
            for (fsi = 0; fsi < fStarSize * 2; fsi += 2) {
              PetscCall(DMPlexGetTransitiveClosure(dm, fStar[fsi], PETSC_TRUE, &fClosureSize, &fClosure));
              for (fci = 0; fci < fClosureSize * 2; fci += 2) PetscCall(PetscHSetIAdd(cht, fClosure[fci]));
              PetscCall(DMPlexRestoreTransitiveClosure(dm, fStar[fsi], PETSC_TRUE, NULL, &fClosure));
            }
            PetscCall(DMPlexRestoreTransitiveClosure(dm, seenpoint, PETSC_FALSE, NULL, &fStar));
          }
        }
      }
      PetscCall(DMPlexRestoreTransitiveClosure(dm, ownedpoint, PETSC_TRUE, NULL, &closure));
    }
    PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_FALSE, NULL, &star));
  }
  PetscCall(DMDestroy(&dm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchGetGlobalDofs(PC pc, PetscSection dofSection[], PetscInt f, PetscBool combined, PetscInt p, PetscInt *dof, PetscInt *off)
{
  PetscFunctionBegin;
  if (combined) {
    if (f < 0) {
      if (dof) PetscCall(PetscSectionGetDof(dofSection[0], p, dof));
      if (off) PetscCall(PetscSectionGetOffset(dofSection[0], p, off));
    } else {
      if (dof) PetscCall(PetscSectionGetFieldDof(dofSection[0], p, f, dof));
      if (off) PetscCall(PetscSectionGetFieldOffset(dofSection[0], p, f, off));
    }
  } else {
    if (f < 0) {
      PC_PATCH *patch = (PC_PATCH *)pc->data;
      PetscInt  fdof, g;

      if (dof) {
        *dof = 0;
        for (g = 0; g < patch->nsubspaces; ++g) {
          PetscCall(PetscSectionGetDof(dofSection[g], p, &fdof));
          *dof += fdof;
        }
      }
      if (off) {
        *off = 0;
        for (g = 0; g < patch->nsubspaces; ++g) {
          PetscCall(PetscSectionGetOffset(dofSection[g], p, &fdof));
          *off += fdof;
        }
      }
    } else {
      if (dof) PetscCall(PetscSectionGetDof(dofSection[f], p, dof));
      if (off) PetscCall(PetscSectionGetOffset(dofSection[f], p, off));
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* Given a hash table with a set of topological entities (pts), compute the degrees of
   freedom in global concatenated numbering on those entities.
   For Vanka smoothing, this needs to do something special: ignore dofs of the
   constraint subspace on entities that aren't the base entity we're building the patch
   around. */
static PetscErrorCode PCPatchGetPointDofs(PC pc, PetscHSetI pts, PetscHSetI dofs, PetscInt base, PetscHSetI *subspaces_to_exclude)
{
  PC_PATCH     *patch = (PC_PATCH *)pc->data;
  PetscHashIter hi;
  PetscInt      ldof, loff;
  PetscInt      k, p;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(dofs));
  for (k = 0; k < patch->nsubspaces; ++k) {
    PetscInt subspaceOffset = patch->subspaceOffsets[k];
    PetscInt bs             = patch->bs[k];
    PetscInt j, l;

    if (subspaces_to_exclude != NULL) {
      PetscBool should_exclude_k = PETSC_FALSE;
      PetscCall(PetscHSetIHas(*subspaces_to_exclude, k, &should_exclude_k));
      if (should_exclude_k) {
        /* only get this subspace dofs at the base entity, not any others */
        PetscCall(PCPatchGetGlobalDofs(pc, patch->dofSection, k, patch->combined, base, &ldof, &loff));
        if (0 == ldof) continue;
        for (j = loff; j < ldof + loff; ++j) {
          for (l = 0; l < bs; ++l) {
            PetscInt dof = bs * j + l + subspaceOffset;
            PetscCall(PetscHSetIAdd(dofs, dof));
          }
        }
        continue; /* skip the other dofs of this subspace */
      }
    }

    PetscHashIterBegin(pts, hi);
    while (!PetscHashIterAtEnd(pts, hi)) {
      PetscHashIterGetKey(pts, hi, p);
      PetscHashIterNext(pts, hi);
      PetscCall(PCPatchGetGlobalDofs(pc, patch->dofSection, k, patch->combined, p, &ldof, &loff));
      if (0 == ldof) continue;
      for (j = loff; j < ldof + loff; ++j) {
        for (l = 0; l < bs; ++l) {
          PetscInt dof = bs * j + l + subspaceOffset;
          PetscCall(PetscHSetIAdd(dofs, dof));
        }
      }
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* Given two hash tables A and B, compute the keys in B that are not in A, and put them in C */
static PetscErrorCode PCPatchComputeSetDifference_Private(PetscHSetI A, PetscHSetI B, PetscHSetI C)
{
  PetscHashIter hi;
  PetscInt      key;
  PetscBool     flg;

  PetscFunctionBegin;
  PetscCall(PetscHSetIClear(C));
  PetscHashIterBegin(B, hi);
  while (!PetscHashIterAtEnd(B, hi)) {
    PetscHashIterGetKey(B, hi, key);
    PetscHashIterNext(B, hi);
    PetscCall(PetscHSetIHas(A, key, &flg));
    if (!flg) PetscCall(PetscHSetIAdd(C, key));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

// PetscClangLinter pragma disable: -fdoc-sowing-chars
/*
  PCPatchCreateCellPatches - create patches.

  Input Parameter:
  . dm - The DMPlex object defining the mesh

  Output Parameters:
  + cellCounts  - Section with counts of cells around each vertex
  . cells       - IS of the cell point indices of cells in each patch
  . pointCounts - Section with counts of cells around each vertex
  - point       - IS of the cell point indices of cells in each patch
 */
static PetscErrorCode PCPatchCreateCellPatches(PC pc)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  DMLabel         ghost = NULL;
  DM              dm, plex;
  PetscHSetI      ht = NULL, cht = NULL;
  PetscSection    cellCounts, pointCounts, intFacetCounts, extFacetCounts;
  PetscInt       *cellsArray, *pointsArray, *intFacetsArray, *extFacetsArray, *intFacetsToPatchCell;
  PetscInt        numCells, numPoints, numIntFacets, numExtFacets;
  const PetscInt *leaves;
  PetscInt        nleaves, pStart, pEnd, cStart, cEnd, vStart, vEnd, fStart, fEnd, v;
  PetscBool       isFiredrake;

  PetscFunctionBegin;
  /* Used to keep track of the cells in the patch. */
  PetscCall(PetscHSetICreate(&ht));
  PetscCall(PetscHSetICreate(&cht));

  PetscCall(PCGetDM(pc, &dm));
  PetscCheck(dm, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "DM not yet set on patch PC");
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
  PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));

  if (patch->user_patches) {
    PetscCall(patch->userpatchconstructionop(pc, &patch->npatch, &patch->userIS, &patch->iterationSet, patch->userpatchconstructctx));
    vStart = 0;
    vEnd   = patch->npatch;
  } else if (patch->ctype == PC_PATCH_PARDECOMP) {
    vStart = 0;
    vEnd   = 1;
  } else if (patch->codim < 0) {
    if (patch->dim < 0) PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
    else PetscCall(DMPlexGetDepthStratum(dm, patch->dim, &vStart, &vEnd));
  } else PetscCall(DMPlexGetHeightStratum(dm, patch->codim, &vStart, &vEnd));
  patch->npatch = vEnd - vStart;

  /* These labels mark the owned points.  We only create patches around points that this process owns. */
  PetscCall(DMHasLabel(dm, "pyop2_ghost", &isFiredrake));
  if (isFiredrake) {
    PetscCall(DMGetLabel(dm, "pyop2_ghost", &ghost));
    PetscCall(DMLabelCreateIndex(ghost, pStart, pEnd));
  } else {
    PetscSF sf;

    PetscCall(DMGetPointSF(dm, &sf));
    PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &leaves, NULL));
    nleaves = PetscMax(nleaves, 0);
  }

  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->cellCounts));
  PetscCall(PetscObjectSetName((PetscObject)patch->cellCounts, "Patch Cell Layout"));
  cellCounts = patch->cellCounts;
  PetscCall(PetscSectionSetChart(cellCounts, vStart, vEnd));
  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->pointCounts));
  PetscCall(PetscObjectSetName((PetscObject)patch->pointCounts, "Patch Point Layout"));
  pointCounts = patch->pointCounts;
  PetscCall(PetscSectionSetChart(pointCounts, vStart, vEnd));
  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->extFacetCounts));
  PetscCall(PetscObjectSetName((PetscObject)patch->extFacetCounts, "Patch Exterior Facet Layout"));
  extFacetCounts = patch->extFacetCounts;
  PetscCall(PetscSectionSetChart(extFacetCounts, vStart, vEnd));
  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->intFacetCounts));
  PetscCall(PetscObjectSetName((PetscObject)patch->intFacetCounts, "Patch Interior Facet Layout"));
  intFacetCounts = patch->intFacetCounts;
  PetscCall(PetscSectionSetChart(intFacetCounts, vStart, vEnd));
  /* Count cells and points in the patch surrounding each entity */
  PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
  for (v = vStart; v < vEnd; ++v) {
    PetscHashIter hi;
    PetscInt      chtSize, loc = -1;
    PetscBool     flg;

    if (!patch->user_patches && patch->ctype != PC_PATCH_PARDECOMP) {
      if (ghost) PetscCall(DMLabelHasPoint(ghost, v, &flg));
      else {
        PetscCall(PetscFindInt(v, nleaves, leaves, &loc));
        flg = loc >= 0 ? PETSC_TRUE : PETSC_FALSE;
      }
      /* Not an owned entity, don't make a cell patch. */
      if (flg) continue;
    }

    PetscCall(patch->patchconstructop((void *)patch, dm, v, ht));
    PetscCall(PCPatchCompleteCellPatch(pc, ht, cht));
    PetscCall(PetscHSetIGetSize(cht, &chtSize));
    /* empty patch, continue */
    if (chtSize == 0) continue;

    /* safe because size(cht) > 0 from above */
    PetscHashIterBegin(cht, hi);
    while (!PetscHashIterAtEnd(cht, hi)) {
      PetscInt point, pdof;

      PetscHashIterGetKey(cht, hi, point);
      if (fStart <= point && point < fEnd) {
        const PetscInt *support;
        PetscInt        supportSize, p;
        PetscBool       interior = PETSC_TRUE;
        PetscCall(DMPlexGetSupport(dm, point, &support));
        PetscCall(DMPlexGetSupportSize(dm, point, &supportSize));
        if (supportSize == 1) {
          interior = PETSC_FALSE;
        } else {
          for (p = 0; p < supportSize; p++) {
            PetscBool found;
            /* FIXME: can I do this while iterating over cht? */
            PetscCall(PetscHSetIHas(cht, support[p], &found));
            if (!found) {
              interior = PETSC_FALSE;
              break;
            }
          }
        }
        if (interior) {
          PetscCall(PetscSectionAddDof(intFacetCounts, v, 1));
        } else {
          PetscCall(PetscSectionAddDof(extFacetCounts, v, 1));
        }
      }
      PetscCall(PCPatchGetGlobalDofs(pc, patch->dofSection, -1, patch->combined, point, &pdof, NULL));
      if (pdof) PetscCall(PetscSectionAddDof(pointCounts, v, 1));
      if (point >= cStart && point < cEnd) PetscCall(PetscSectionAddDof(cellCounts, v, 1));
      PetscHashIterNext(cht, hi);
    }
  }
  if (isFiredrake) PetscCall(DMLabelDestroyIndex(ghost));

  PetscCall(PetscSectionSetUp(cellCounts));
  PetscCall(PetscSectionGetStorageSize(cellCounts, &numCells));
  PetscCall(PetscMalloc1(numCells, &cellsArray));
  PetscCall(PetscSectionSetUp(pointCounts));
  PetscCall(PetscSectionGetStorageSize(pointCounts, &numPoints));
  PetscCall(PetscMalloc1(numPoints, &pointsArray));

  PetscCall(PetscSectionSetUp(intFacetCounts));
  PetscCall(PetscSectionSetUp(extFacetCounts));
  PetscCall(PetscSectionGetStorageSize(intFacetCounts, &numIntFacets));
  PetscCall(PetscSectionGetStorageSize(extFacetCounts, &numExtFacets));
  PetscCall(PetscMalloc1(numIntFacets, &intFacetsArray));
  PetscCall(PetscMalloc1(numIntFacets * 2, &intFacetsToPatchCell));
  PetscCall(PetscMalloc1(numExtFacets, &extFacetsArray));

  /* Now that we know how much space we need, run through again and actually remember the cells. */
  for (v = vStart; v < vEnd; v++) {
    PetscHashIter hi;
    PetscInt      dof, off, cdof, coff, efdof, efoff, ifdof, ifoff, pdof, n = 0, cn = 0, ifn = 0, efn = 0;

    PetscCall(PetscSectionGetDof(pointCounts, v, &dof));
    PetscCall(PetscSectionGetOffset(pointCounts, v, &off));
    PetscCall(PetscSectionGetDof(cellCounts, v, &cdof));
    PetscCall(PetscSectionGetOffset(cellCounts, v, &coff));
    PetscCall(PetscSectionGetDof(intFacetCounts, v, &ifdof));
    PetscCall(PetscSectionGetOffset(intFacetCounts, v, &ifoff));
    PetscCall(PetscSectionGetDof(extFacetCounts, v, &efdof));
    PetscCall(PetscSectionGetOffset(extFacetCounts, v, &efoff));
    if (dof <= 0) continue;
    PetscCall(patch->patchconstructop((void *)patch, dm, v, ht));
    PetscCall(PCPatchCompleteCellPatch(pc, ht, cht));
    PetscHashIterBegin(cht, hi);
    while (!PetscHashIterAtEnd(cht, hi)) {
      PetscInt point;

      PetscHashIterGetKey(cht, hi, point);
      if (fStart <= point && point < fEnd) {
        const PetscInt *support;
        PetscInt        supportSize, p;
        PetscBool       interior = PETSC_TRUE;
        PetscCall(DMPlexGetSupport(dm, point, &support));
        PetscCall(DMPlexGetSupportSize(dm, point, &supportSize));
        if (supportSize == 1) {
          interior = PETSC_FALSE;
        } else {
          for (p = 0; p < supportSize; p++) {
            PetscBool found;
            /* FIXME: can I do this while iterating over cht? */
            PetscCall(PetscHSetIHas(cht, support[p], &found));
            if (!found) {
              interior = PETSC_FALSE;
              break;
            }
          }
        }
        if (interior) {
          intFacetsToPatchCell[2 * (ifoff + ifn)]     = support[0];
          intFacetsToPatchCell[2 * (ifoff + ifn) + 1] = support[1];
          intFacetsArray[ifoff + ifn++]               = point;
        } else {
          extFacetsArray[efoff + efn++] = point;
        }
      }
      PetscCall(PCPatchGetGlobalDofs(pc, patch->dofSection, -1, patch->combined, point, &pdof, NULL));
      if (pdof) pointsArray[off + n++] = point;
      if (point >= cStart && point < cEnd) cellsArray[coff + cn++] = point;
      PetscHashIterNext(cht, hi);
    }
    PetscCheck(ifn == ifdof, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of interior facets in patch %" PetscInt_FMT " is %" PetscInt_FMT ", but should be %" PetscInt_FMT, v, ifn, ifdof);
    PetscCheck(efn == efdof, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of exterior facets in patch %" PetscInt_FMT " is %" PetscInt_FMT ", but should be %" PetscInt_FMT, v, efn, efdof);
    PetscCheck(cn == cdof, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of cells in patch %" PetscInt_FMT " is %" PetscInt_FMT ", but should be %" PetscInt_FMT, v, cn, cdof);
    PetscCheck(n == dof, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of points in patch %" PetscInt_FMT " is %" PetscInt_FMT ", but should be %" PetscInt_FMT, v, n, dof);

    for (ifn = 0; ifn < ifdof; ifn++) {
      PetscInt  cell0  = intFacetsToPatchCell[2 * (ifoff + ifn)];
      PetscInt  cell1  = intFacetsToPatchCell[2 * (ifoff + ifn) + 1];
      PetscBool found0 = PETSC_FALSE, found1 = PETSC_FALSE;
      for (n = 0; n < cdof; n++) {
        if (!found0 && cell0 == cellsArray[coff + n]) {
          intFacetsToPatchCell[2 * (ifoff + ifn)] = n;
          found0                                  = PETSC_TRUE;
        }
        if (!found1 && cell1 == cellsArray[coff + n]) {
          intFacetsToPatchCell[2 * (ifoff + ifn) + 1] = n;
          found1                                      = PETSC_TRUE;
        }
        if (found0 && found1) break;
      }
      PetscCheck(found0 && found1, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Didn't manage to find local point numbers for facet support");
    }
  }
  PetscCall(PetscHSetIDestroy(&ht));
  PetscCall(PetscHSetIDestroy(&cht));

  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numCells, cellsArray, PETSC_OWN_POINTER, &patch->cells));
  PetscCall(PetscObjectSetName((PetscObject)patch->cells, "Patch Cells"));
  if (patch->viewCells) {
    PetscCall(ObjectView((PetscObject)patch->cellCounts, patch->viewerCells, patch->formatCells));
    PetscCall(ObjectView((PetscObject)patch->cells, patch->viewerCells, patch->formatCells));
  }
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numIntFacets, intFacetsArray, PETSC_OWN_POINTER, &patch->intFacets));
  PetscCall(PetscObjectSetName((PetscObject)patch->intFacets, "Patch Interior Facets"));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2 * numIntFacets, intFacetsToPatchCell, PETSC_OWN_POINTER, &patch->intFacetsToPatchCell));
  PetscCall(PetscObjectSetName((PetscObject)patch->intFacetsToPatchCell, "Patch Interior Facets local support"));
  if (patch->viewIntFacets) {
    PetscCall(ObjectView((PetscObject)patch->intFacetCounts, patch->viewerIntFacets, patch->formatIntFacets));
    PetscCall(ObjectView((PetscObject)patch->intFacets, patch->viewerIntFacets, patch->formatIntFacets));
    PetscCall(ObjectView((PetscObject)patch->intFacetsToPatchCell, patch->viewerIntFacets, patch->formatIntFacets));
  }
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numExtFacets, extFacetsArray, PETSC_OWN_POINTER, &patch->extFacets));
  PetscCall(PetscObjectSetName((PetscObject)patch->extFacets, "Patch Exterior Facets"));
  if (patch->viewExtFacets) {
    PetscCall(ObjectView((PetscObject)patch->extFacetCounts, patch->viewerExtFacets, patch->formatExtFacets));
    PetscCall(ObjectView((PetscObject)patch->extFacets, patch->viewerExtFacets, patch->formatExtFacets));
  }
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPoints, pointsArray, PETSC_OWN_POINTER, &patch->points));
  PetscCall(PetscObjectSetName((PetscObject)patch->points, "Patch Points"));
  if (patch->viewPoints) {
    PetscCall(ObjectView((PetscObject)patch->pointCounts, patch->viewerPoints, patch->formatPoints));
    PetscCall(ObjectView((PetscObject)patch->points, patch->viewerPoints, patch->formatPoints));
  }
  PetscCall(DMDestroy(&dm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
  PCPatchCreateCellPatchDiscretisationInfo - Build the dof maps for cell patches

  Input Parameters:
  + dm - The DMPlex object defining the mesh
  . cellCounts - Section with counts of cells around each vertex
  . cells - IS of the cell point indices of cells in each patch
  . cellNumbering - Section mapping plex cell points to Firedrake cell indices.
  . nodesPerCell - number of nodes per cell.
  - cellNodeMap - map from cells to node indices (nodesPerCell * numCells)

  Output Parameters:
  + dofs - IS of local dof numbers of each cell in the patch, where local is a patch local numbering
  . gtolCounts - Section with counts of dofs per cell patch
  - gtol - IS mapping from global dofs to local dofs for each patch.
 */
static PetscErrorCode PCPatchCreateCellPatchDiscretisationInfo(PC pc)
{
  PC_PATCH       *patch       = (PC_PATCH *)pc->data;
  PetscSection    cellCounts  = patch->cellCounts;
  PetscSection    pointCounts = patch->pointCounts;
  PetscSection    gtolCounts, gtolCountsWithArtificial = NULL, gtolCountsWithAll = NULL;
  IS              cells         = patch->cells;
  IS              points        = patch->points;
  PetscSection    cellNumbering = patch->cellNumbering;
  PetscInt        Nf            = patch->nsubspaces;
  PetscInt        numCells, numPoints;
  PetscInt        numDofs;
  PetscInt        numGlobalDofs, numGlobalDofsWithArtificial, numGlobalDofsWithAll;
  PetscInt        totalDofsPerCell = patch->totalDofsPerCell;
  PetscInt        vStart, vEnd, v;
  const PetscInt *cellsArray, *pointsArray;
  PetscInt       *newCellsArray                 = NULL;
  PetscInt       *dofsArray                     = NULL;
  PetscInt       *dofsArrayWithArtificial       = NULL;
  PetscInt       *dofsArrayWithAll              = NULL;
  PetscInt       *offsArray                     = NULL;
  PetscInt       *offsArrayWithArtificial       = NULL;
  PetscInt       *offsArrayWithAll              = NULL;
  PetscInt       *asmArray                      = NULL;
  PetscInt       *asmArrayWithArtificial        = NULL;
  PetscInt       *asmArrayWithAll               = NULL;
  PetscInt       *globalDofsArray               = NULL;
  PetscInt       *globalDofsArrayWithArtificial = NULL;
  PetscInt       *globalDofsArrayWithAll        = NULL;
  PetscInt        globalIndex                   = 0;
  PetscInt        key                           = 0;
  PetscInt        asmKey                        = 0;
  DM              dm                            = NULL, plex;
  const PetscInt *bcNodes                       = NULL;
  PetscHMapI      ht;
  PetscHMapI      htWithArtificial;
  PetscHMapI      htWithAll;
  PetscHSetI      globalBcs;
  PetscInt        numBcs;
  PetscHSetI      ownedpts, seenpts, owneddofs, seendofs, artificialbcs;
  PetscInt        pStart, pEnd, p, i;
  char            option[PETSC_MAX_PATH_LEN];
  PetscBool       isNonlinear;

  PetscFunctionBegin;
  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  /* dofcounts section is cellcounts section * dofPerCell */
  PetscCall(PetscSectionGetStorageSize(cellCounts, &numCells));
  PetscCall(PetscSectionGetStorageSize(patch->pointCounts, &numPoints));
  numDofs = numCells * totalDofsPerCell;
  PetscCall(PetscMalloc1(numDofs, &dofsArray));
  PetscCall(PetscMalloc1(numPoints * Nf, &offsArray));
  PetscCall(PetscMalloc1(numDofs, &asmArray));
  PetscCall(PetscMalloc1(numCells, &newCellsArray));
  PetscCall(PetscSectionGetChart(cellCounts, &vStart, &vEnd));
  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->gtolCounts));
  gtolCounts = patch->gtolCounts;
  PetscCall(PetscSectionSetChart(gtolCounts, vStart, vEnd));
  PetscCall(PetscObjectSetName((PetscObject)patch->gtolCounts, "Patch Global Index Section"));

  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
    PetscCall(PetscMalloc1(numPoints * Nf, &offsArrayWithArtificial));
    PetscCall(PetscMalloc1(numDofs, &asmArrayWithArtificial));
    PetscCall(PetscMalloc1(numDofs, &dofsArrayWithArtificial));
    PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->gtolCountsWithArtificial));
    gtolCountsWithArtificial = patch->gtolCountsWithArtificial;
    PetscCall(PetscSectionSetChart(gtolCountsWithArtificial, vStart, vEnd));
    PetscCall(PetscObjectSetName((PetscObject)patch->gtolCountsWithArtificial, "Patch Global Index Section Including Artificial BCs"));
  }

  isNonlinear = patch->isNonlinear;
  if (isNonlinear) {
    PetscCall(PetscMalloc1(numPoints * Nf, &offsArrayWithAll));
    PetscCall(PetscMalloc1(numDofs, &asmArrayWithAll));
    PetscCall(PetscMalloc1(numDofs, &dofsArrayWithAll));
    PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->gtolCountsWithAll));
    gtolCountsWithAll = patch->gtolCountsWithAll;
    PetscCall(PetscSectionSetChart(gtolCountsWithAll, vStart, vEnd));
    PetscCall(PetscObjectSetName((PetscObject)patch->gtolCountsWithAll, "Patch Global Index Section Including All BCs"));
  }

  /* Outside the patch loop, get the dofs that are globally-enforced Dirichlet
   conditions */
  PetscCall(PetscHSetICreate(&globalBcs));
  PetscCall(ISGetIndices(patch->ghostBcNodes, &bcNodes));
  PetscCall(ISGetSize(patch->ghostBcNodes, &numBcs));
  for (i = 0; i < numBcs; ++i) PetscCall(PetscHSetIAdd(globalBcs, bcNodes[i])); /* these are already in concatenated numbering */
  PetscCall(ISRestoreIndices(patch->ghostBcNodes, &bcNodes));
  PetscCall(ISDestroy(&patch->ghostBcNodes)); /* memory optimisation */

  /* Hash tables for artificial BC construction */
  PetscCall(PetscHSetICreate(&ownedpts));
  PetscCall(PetscHSetICreate(&seenpts));
  PetscCall(PetscHSetICreate(&owneddofs));
  PetscCall(PetscHSetICreate(&seendofs));
  PetscCall(PetscHSetICreate(&artificialbcs));

  PetscCall(ISGetIndices(cells, &cellsArray));
  PetscCall(ISGetIndices(points, &pointsArray));
  PetscCall(PetscHMapICreate(&ht));
  PetscCall(PetscHMapICreate(&htWithArtificial));
  PetscCall(PetscHMapICreate(&htWithAll));
  for (v = vStart; v < vEnd; ++v) {
    PetscInt localIndex               = 0;
    PetscInt localIndexWithArtificial = 0;
    PetscInt localIndexWithAll        = 0;
    PetscInt dof, off, i, j, k, l;

    PetscCall(PetscHMapIClear(ht));
    PetscCall(PetscHMapIClear(htWithArtificial));
    PetscCall(PetscHMapIClear(htWithAll));
    PetscCall(PetscSectionGetDof(cellCounts, v, &dof));
    PetscCall(PetscSectionGetOffset(cellCounts, v, &off));
    if (dof <= 0) continue;

    /* Calculate the global numbers of the artificial BC dofs here first */
    PetscCall(patch->patchconstructop((void *)patch, dm, v, ownedpts));
    PetscCall(PCPatchCompleteCellPatch(pc, ownedpts, seenpts));
    PetscCall(PCPatchGetPointDofs(pc, ownedpts, owneddofs, v, &patch->subspaces_to_exclude));
    PetscCall(PCPatchGetPointDofs(pc, seenpts, seendofs, v, NULL));
    PetscCall(PCPatchComputeSetDifference_Private(owneddofs, seendofs, artificialbcs));
    if (patch->viewPatches) {
      PetscHSetI    globalbcdofs;
      PetscHashIter hi;
      MPI_Comm      comm = PetscObjectComm((PetscObject)pc);

      PetscCall(PetscHSetICreate(&globalbcdofs));
      PetscCall(PetscSynchronizedPrintf(comm, "Patch %" PetscInt_FMT ": owned dofs:\n", v));
      PetscHashIterBegin(owneddofs, hi);
      while (!PetscHashIterAtEnd(owneddofs, hi)) {
        PetscInt globalDof;

        PetscHashIterGetKey(owneddofs, hi, globalDof);
        PetscHashIterNext(owneddofs, hi);
        PetscCall(PetscSynchronizedPrintf(comm, "%" PetscInt_FMT " ", globalDof));
      }
      PetscCall(PetscSynchronizedPrintf(comm, "\n"));
      PetscCall(PetscSynchronizedPrintf(comm, "Patch %" PetscInt_FMT ": seen dofs:\n", v));
      PetscHashIterBegin(seendofs, hi);
      while (!PetscHashIterAtEnd(seendofs, hi)) {
        PetscInt  globalDof;
        PetscBool flg;

        PetscHashIterGetKey(seendofs, hi, globalDof);
        PetscHashIterNext(seendofs, hi);
        PetscCall(PetscSynchronizedPrintf(comm, "%" PetscInt_FMT " ", globalDof));

        PetscCall(PetscHSetIHas(globalBcs, globalDof, &flg));
        if (flg) PetscCall(PetscHSetIAdd(globalbcdofs, globalDof));
      }
      PetscCall(PetscSynchronizedPrintf(comm, "\n"));
      PetscCall(PetscSynchronizedPrintf(comm, "Patch %" PetscInt_FMT ": global BCs:\n", v));
      PetscCall(PetscHSetIGetSize(globalbcdofs, &numBcs));
      if (numBcs > 0) {
        PetscHashIterBegin(globalbcdofs, hi);
        while (!PetscHashIterAtEnd(globalbcdofs, hi)) {
          PetscInt globalDof;
          PetscHashIterGetKey(globalbcdofs, hi, globalDof);
          PetscHashIterNext(globalbcdofs, hi);
          PetscCall(PetscSynchronizedPrintf(comm, "%" PetscInt_FMT " ", globalDof));
        }
      }
      PetscCall(PetscSynchronizedPrintf(comm, "\n"));
      PetscCall(PetscSynchronizedPrintf(comm, "Patch %" PetscInt_FMT ": artificial BCs:\n", v));
      PetscCall(PetscHSetIGetSize(artificialbcs, &numBcs));
      if (numBcs > 0) {
        PetscHashIterBegin(artificialbcs, hi);
        while (!PetscHashIterAtEnd(artificialbcs, hi)) {
          PetscInt globalDof;
          PetscHashIterGetKey(artificialbcs, hi, globalDof);
          PetscHashIterNext(artificialbcs, hi);
          PetscCall(PetscSynchronizedPrintf(comm, "%" PetscInt_FMT " ", globalDof));
        }
      }
      PetscCall(PetscSynchronizedPrintf(comm, "\n\n"));
      PetscCall(PetscHSetIDestroy(&globalbcdofs));
    }
    for (k = 0; k < patch->nsubspaces; ++k) {
      const PetscInt *cellNodeMap    = patch->cellNodeMap[k];
      PetscInt        nodesPerCell   = patch->nodesPerCell[k];
      PetscInt        subspaceOffset = patch->subspaceOffsets[k];
      PetscInt        bs             = patch->bs[k];

      for (i = off; i < off + dof; ++i) {
        /* Walk over the cells in this patch. */
        const PetscInt c    = cellsArray[i];
        PetscInt       cell = c;

        /* TODO Change this to an IS */
        if (cellNumbering) {
          PetscCall(PetscSectionGetDof(cellNumbering, c, &cell));
          PetscCheck(cell > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Cell %" PetscInt_FMT " doesn't appear in cell numbering map", c);
          PetscCall(PetscSectionGetOffset(cellNumbering, c, &cell));
        }
        newCellsArray[i] = cell;
        for (j = 0; j < nodesPerCell; ++j) {
          /* For each global dof, map it into contiguous local storage. */
          const PetscInt globalDof = cellNodeMap[cell * nodesPerCell + j] * bs + subspaceOffset;
          /* finally, loop over block size */
          for (l = 0; l < bs; ++l) {
            PetscInt  localDof;
            PetscBool isGlobalBcDof, isArtificialBcDof;

            /* first, check if this is either a globally enforced or locally enforced BC dof */
            PetscCall(PetscHSetIHas(globalBcs, globalDof + l, &isGlobalBcDof));
            PetscCall(PetscHSetIHas(artificialbcs, globalDof + l, &isArtificialBcDof));

            /* if it's either, don't ever give it a local dof number */
            if (isGlobalBcDof || isArtificialBcDof) {
              dofsArray[globalIndex] = -1; /* don't use this in assembly in this patch */
            } else {
              PetscCall(PetscHMapIGet(ht, globalDof + l, &localDof));
              if (localDof == -1) {
                localDof = localIndex++;
                PetscCall(PetscHMapISet(ht, globalDof + l, localDof));
              }
              PetscCheck(globalIndex < numDofs, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Found more dofs %" PetscInt_FMT " than expected %" PetscInt_FMT, globalIndex + 1, numDofs);
              /* And store. */
              dofsArray[globalIndex] = localDof;
            }

            if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
              if (isGlobalBcDof) {
                dofsArrayWithArtificial[globalIndex] = -1; /* don't use this in assembly in this patch */
              } else {
                PetscCall(PetscHMapIGet(htWithArtificial, globalDof + l, &localDof));
                if (localDof == -1) {
                  localDof = localIndexWithArtificial++;
                  PetscCall(PetscHMapISet(htWithArtificial, globalDof + l, localDof));
                }
                PetscCheck(globalIndex < numDofs, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Found more dofs %" PetscInt_FMT " than expected %" PetscInt_FMT, globalIndex + 1, numDofs);
                /* And store.*/
                dofsArrayWithArtificial[globalIndex] = localDof;
              }
            }

            if (isNonlinear) {
              /* Build the dofmap for the function space with _all_ dofs,
   including those in any kind of boundary condition */
              PetscCall(PetscHMapIGet(htWithAll, globalDof + l, &localDof));
              if (localDof == -1) {
                localDof = localIndexWithAll++;
                PetscCall(PetscHMapISet(htWithAll, globalDof + l, localDof));
              }
              PetscCheck(globalIndex < numDofs, PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "Found more dofs %" PetscInt_FMT " than expected %" PetscInt_FMT, globalIndex + 1, numDofs);
              /* And store.*/
              dofsArrayWithAll[globalIndex] = localDof;
            }
            globalIndex++;
          }
        }
      }
    }
    /* How many local dofs in this patch? */
    if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
      PetscCall(PetscHMapIGetSize(htWithArtificial, &dof));
      PetscCall(PetscSectionSetDof(gtolCountsWithArtificial, v, dof));
    }
    if (isNonlinear) {
      PetscCall(PetscHMapIGetSize(htWithAll, &dof));
      PetscCall(PetscSectionSetDof(gtolCountsWithAll, v, dof));
    }
    PetscCall(PetscHMapIGetSize(ht, &dof));
    PetscCall(PetscSectionSetDof(gtolCounts, v, dof));
  }

  PetscCall(DMDestroy(&dm));
  PetscCheck(globalIndex == numDofs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Expected number of dofs (%" PetscInt_FMT ") doesn't match found number (%" PetscInt_FMT ")", numDofs, globalIndex);
  PetscCall(PetscSectionSetUp(gtolCounts));
  PetscCall(PetscSectionGetStorageSize(gtolCounts, &numGlobalDofs));
  PetscCall(PetscMalloc1(numGlobalDofs, &globalDofsArray));

  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
    PetscCall(PetscSectionSetUp(gtolCountsWithArtificial));
    PetscCall(PetscSectionGetStorageSize(gtolCountsWithArtificial, &numGlobalDofsWithArtificial));
    PetscCall(PetscMalloc1(numGlobalDofsWithArtificial, &globalDofsArrayWithArtificial));
  }
  if (isNonlinear) {
    PetscCall(PetscSectionSetUp(gtolCountsWithAll));
    PetscCall(PetscSectionGetStorageSize(gtolCountsWithAll, &numGlobalDofsWithAll));
    PetscCall(PetscMalloc1(numGlobalDofsWithAll, &globalDofsArrayWithAll));
  }
  /* Now populate the global to local map.  This could be merged into the above loop if we were willing to deal with reallocs. */
  for (v = vStart; v < vEnd; ++v) {
    PetscHashIter hi;
    PetscInt      dof, off, Np, ooff, i, j, k, l;

    PetscCall(PetscHMapIClear(ht));
    PetscCall(PetscHMapIClear(htWithArtificial));
    PetscCall(PetscHMapIClear(htWithAll));
    PetscCall(PetscSectionGetDof(cellCounts, v, &dof));
    PetscCall(PetscSectionGetOffset(cellCounts, v, &off));
    PetscCall(PetscSectionGetDof(pointCounts, v, &Np));
    PetscCall(PetscSectionGetOffset(pointCounts, v, &ooff));
    if (dof <= 0) continue;

    for (k = 0; k < patch->nsubspaces; ++k) {
      const PetscInt *cellNodeMap    = patch->cellNodeMap[k];
      PetscInt        nodesPerCell   = patch->nodesPerCell[k];
      PetscInt        subspaceOffset = patch->subspaceOffsets[k];
      PetscInt        bs             = patch->bs[k];
      PetscInt        goff;

      for (i = off; i < off + dof; ++i) {
        /* Reconstruct mapping of global-to-local on this patch. */
        const PetscInt c    = cellsArray[i];
        PetscInt       cell = c;

        if (cellNumbering) PetscCall(PetscSectionGetOffset(cellNumbering, c, &cell));
        for (j = 0; j < nodesPerCell; ++j) {
          for (l = 0; l < bs; ++l) {
            const PetscInt globalDof = cellNodeMap[cell * nodesPerCell + j] * bs + l + subspaceOffset;
            const PetscInt localDof  = dofsArray[key];
            if (localDof >= 0) PetscCall(PetscHMapISet(ht, globalDof, localDof));
            if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
              const PetscInt localDofWithArtificial = dofsArrayWithArtificial[key];
              if (localDofWithArtificial >= 0) PetscCall(PetscHMapISet(htWithArtificial, globalDof, localDofWithArtificial));
            }
            if (isNonlinear) {
              const PetscInt localDofWithAll = dofsArrayWithAll[key];
              if (localDofWithAll >= 0) PetscCall(PetscHMapISet(htWithAll, globalDof, localDofWithAll));
            }
            key++;
          }
        }
      }

      /* Shove it in the output data structure. */
      PetscCall(PetscSectionGetOffset(gtolCounts, v, &goff));
      PetscHashIterBegin(ht, hi);
      while (!PetscHashIterAtEnd(ht, hi)) {
        PetscInt globalDof, localDof;

        PetscHashIterGetKey(ht, hi, globalDof);
        PetscHashIterGetVal(ht, hi, localDof);
        if (globalDof >= 0) globalDofsArray[goff + localDof] = globalDof;
        PetscHashIterNext(ht, hi);
      }

      if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
        PetscCall(PetscSectionGetOffset(gtolCountsWithArtificial, v, &goff));
        PetscHashIterBegin(htWithArtificial, hi);
        while (!PetscHashIterAtEnd(htWithArtificial, hi)) {
          PetscInt globalDof, localDof;
          PetscHashIterGetKey(htWithArtificial, hi, globalDof);
          PetscHashIterGetVal(htWithArtificial, hi, localDof);
          if (globalDof >= 0) globalDofsArrayWithArtificial[goff + localDof] = globalDof;
          PetscHashIterNext(htWithArtificial, hi);
        }
      }
      if (isNonlinear) {
        PetscCall(PetscSectionGetOffset(gtolCountsWithAll, v, &goff));
        PetscHashIterBegin(htWithAll, hi);
        while (!PetscHashIterAtEnd(htWithAll, hi)) {
          PetscInt globalDof, localDof;
          PetscHashIterGetKey(htWithAll, hi, globalDof);
          PetscHashIterGetVal(htWithAll, hi, localDof);
          if (globalDof >= 0) globalDofsArrayWithAll[goff + localDof] = globalDof;
          PetscHashIterNext(htWithAll, hi);
        }
      }

      for (p = 0; p < Np; ++p) {
        const PetscInt point = pointsArray[ooff + p];
        PetscInt       globalDof, localDof;

        PetscCall(PCPatchGetGlobalDofs(pc, patch->dofSection, k, patch->combined, point, NULL, &globalDof));
        PetscCall(PetscHMapIGet(ht, globalDof, &localDof));
        offsArray[(ooff + p) * Nf + k] = localDof;
        if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
          PetscCall(PetscHMapIGet(htWithArtificial, globalDof, &localDof));
          offsArrayWithArtificial[(ooff + p) * Nf + k] = localDof;
        }
        if (isNonlinear) {
          PetscCall(PetscHMapIGet(htWithAll, globalDof, &localDof));
          offsArrayWithAll[(ooff + p) * Nf + k] = localDof;
        }
      }
    }

    PetscCall(PetscHSetIDestroy(&globalBcs));
    PetscCall(PetscHSetIDestroy(&ownedpts));
    PetscCall(PetscHSetIDestroy(&seenpts));
    PetscCall(PetscHSetIDestroy(&owneddofs));
    PetscCall(PetscHSetIDestroy(&seendofs));
    PetscCall(PetscHSetIDestroy(&artificialbcs));

    /* At this point, we have a hash table ht built that maps globalDof -> localDof.
   We need to create the dof table laid out cellwise first, then by subspace,
   as the assembler assembles cell-wise and we need to stuff the different
   contributions of the different function spaces to the right places. So we loop
   over cells, then over subspaces. */
    if (patch->nsubspaces > 1) { /* for nsubspaces = 1, data we need is already in dofsArray */
      for (i = off; i < off + dof; ++i) {
        const PetscInt c    = cellsArray[i];
        PetscInt       cell = c;

        if (cellNumbering) PetscCall(PetscSectionGetOffset(cellNumbering, c, &cell));
        for (k = 0; k < patch->nsubspaces; ++k) {
          const PetscInt *cellNodeMap    = patch->cellNodeMap[k];
          PetscInt        nodesPerCell   = patch->nodesPerCell[k];
          PetscInt        subspaceOffset = patch->subspaceOffsets[k];
          PetscInt        bs             = patch->bs[k];

          for (j = 0; j < nodesPerCell; ++j) {
            for (l = 0; l < bs; ++l) {
              const PetscInt globalDof = cellNodeMap[cell * nodesPerCell + j] * bs + l + subspaceOffset;
              PetscInt       localDof;

              PetscCall(PetscHMapIGet(ht, globalDof, &localDof));
              /* If it's not in the hash table, i.e. is a BC dof,
   then the PetscHSetIMap above gives -1, which matches
   exactly the convention for PETSc's matrix assembly to
   ignore the dof. So we don't need to do anything here */
              asmArray[asmKey] = localDof;
              if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
                PetscCall(PetscHMapIGet(htWithArtificial, globalDof, &localDof));
                asmArrayWithArtificial[asmKey] = localDof;
              }
              if (isNonlinear) {
                PetscCall(PetscHMapIGet(htWithAll, globalDof, &localDof));
                asmArrayWithAll[asmKey] = localDof;
              }
              asmKey++;
            }
          }
        }
      }
    }
  }
  if (1 == patch->nsubspaces) {
    PetscCall(PetscArraycpy(asmArray, dofsArray, numDofs));
    if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) PetscCall(PetscArraycpy(asmArrayWithArtificial, dofsArrayWithArtificial, numDofs));
    if (isNonlinear) PetscCall(PetscArraycpy(asmArrayWithAll, dofsArrayWithAll, numDofs));
  }

  PetscCall(PetscHMapIDestroy(&ht));
  PetscCall(PetscHMapIDestroy(&htWithArtificial));
  PetscCall(PetscHMapIDestroy(&htWithAll));
  PetscCall(ISRestoreIndices(cells, &cellsArray));
  PetscCall(ISRestoreIndices(points, &pointsArray));
  PetscCall(PetscFree(dofsArray));
  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) PetscCall(PetscFree(dofsArrayWithArtificial));
  if (isNonlinear) PetscCall(PetscFree(dofsArrayWithAll));
  /* Create placeholder section for map from points to patch dofs */
  PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &patch->patchSection));
  PetscCall(PetscSectionSetNumFields(patch->patchSection, patch->nsubspaces));
  if (patch->combined) {
    PetscInt numFields;
    PetscCall(PetscSectionGetNumFields(patch->dofSection[0], &numFields));
    PetscCheck(numFields == patch->nsubspaces, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Mismatch between number of section fields %" PetscInt_FMT " and number of subspaces %" PetscInt_FMT, numFields, patch->nsubspaces);
    PetscCall(PetscSectionGetChart(patch->dofSection[0], &pStart, &pEnd));
    PetscCall(PetscSectionSetChart(patch->patchSection, pStart, pEnd));
    for (p = pStart; p < pEnd; ++p) {
      PetscInt dof, fdof, f;

      PetscCall(PetscSectionGetDof(patch->dofSection[0], p, &dof));
      PetscCall(PetscSectionSetDof(patch->patchSection, p, dof));
      for (f = 0; f < patch->nsubspaces; ++f) {
        PetscCall(PetscSectionGetFieldDof(patch->dofSection[0], p, f, &fdof));
        PetscCall(PetscSectionSetFieldDof(patch->patchSection, p, f, fdof));
      }
    }
  } else {
    PetscInt pStartf, pEndf, f;
    pStart = PETSC_INT_MAX;
    pEnd   = PETSC_INT_MIN;
    for (f = 0; f < patch->nsubspaces; ++f) {
      PetscCall(PetscSectionGetChart(patch->dofSection[f], &pStartf, &pEndf));
      pStart = PetscMin(pStart, pStartf);
      pEnd   = PetscMax(pEnd, pEndf);
    }
    PetscCall(PetscSectionSetChart(patch->patchSection, pStart, pEnd));
    for (f = 0; f < patch->nsubspaces; ++f) {
      PetscCall(PetscSectionGetChart(patch->dofSection[f], &pStartf, &pEndf));
      for (p = pStartf; p < pEndf; ++p) {
        PetscInt fdof;
        PetscCall(PetscSectionGetDof(patch->dofSection[f], p, &fdof));
        PetscCall(PetscSectionAddDof(patch->patchSection, p, fdof));
        PetscCall(PetscSectionSetFieldDof(patch->patchSection, p, f, fdof));
      }
    }
  }
  PetscCall(PetscSectionSetUp(patch->patchSection));
  PetscCall(PetscSectionSetUseFieldOffsets(patch->patchSection, PETSC_TRUE));
  /* Replace cell indices with firedrake-numbered ones. */
  PetscCall(ISGeneralSetIndices(cells, numCells, (const PetscInt *)newCellsArray, PETSC_OWN_POINTER));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGlobalDofs, globalDofsArray, PETSC_OWN_POINTER, &patch->gtol));
  PetscCall(PetscObjectSetName((PetscObject)patch->gtol, "Global Indices"));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_g2l_view", patch->classname));
  PetscCall(PetscSectionViewFromOptions(patch->gtolCounts, (PetscObject)pc, option));
  PetscCall(ISViewFromOptions(patch->gtol, (PetscObject)pc, option));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numDofs, asmArray, PETSC_OWN_POINTER, &patch->dofs));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPoints * Nf, offsArray, PETSC_OWN_POINTER, &patch->offs));
  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGlobalDofsWithArtificial, globalDofsArrayWithArtificial, PETSC_OWN_POINTER, &patch->gtolWithArtificial));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numDofs, asmArrayWithArtificial, PETSC_OWN_POINTER, &patch->dofsWithArtificial));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPoints * Nf, offsArrayWithArtificial, PETSC_OWN_POINTER, &patch->offsWithArtificial));
  }
  if (isNonlinear) {
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numGlobalDofsWithAll, globalDofsArrayWithAll, PETSC_OWN_POINTER, &patch->gtolWithAll));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numDofs, asmArrayWithAll, PETSC_OWN_POINTER, &patch->dofsWithAll));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPoints * Nf, offsArrayWithAll, PETSC_OWN_POINTER, &patch->offsWithAll));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchCreateMatrix_Private(PC pc, PetscInt point, Mat *mat, PetscBool withArtificial)
{
  PC_PATCH   *patch = (PC_PATCH *)pc->data;
  PetscBool   flg;
  PetscInt    csize, rsize;
  const char *prefix = NULL;

  PetscFunctionBegin;
  if (withArtificial) {
    /* would be nice if we could create a rectangular matrix of size numDofsWithArtificial x numDofs here */
    PetscInt pStart;
    PetscCall(PetscSectionGetChart(patch->gtolCountsWithArtificial, &pStart, NULL));
    PetscCall(PetscSectionGetDof(patch->gtolCountsWithArtificial, point + pStart, &rsize));
    csize = rsize;
  } else {
    PetscInt pStart;
    PetscCall(PetscSectionGetChart(patch->gtolCounts, &pStart, NULL));
    PetscCall(PetscSectionGetDof(patch->gtolCounts, point + pStart, &rsize));
    csize = rsize;
  }

  PetscCall(MatCreate(PETSC_COMM_SELF, mat));
  PetscCall(PCGetOptionsPrefix(pc, &prefix));
  PetscCall(MatSetOptionsPrefix(*mat, prefix));
  PetscCall(MatAppendOptionsPrefix(*mat, "pc_patch_sub_"));
  if (patch->sub_mat_type) PetscCall(MatSetType(*mat, patch->sub_mat_type));
  else if (!patch->sub_mat_type) PetscCall(MatSetType(*mat, MATDENSE));
  PetscCall(MatSetSizes(*mat, rsize, csize, rsize, csize));
  PetscCall(PetscObjectTypeCompare((PetscObject)*mat, MATDENSE, &flg));
  if (!flg) PetscCall(PetscObjectTypeCompare((PetscObject)*mat, MATSEQDENSE, &flg));
  /* Sparse patch matrices */
  if (!flg) {
    PetscBT         bt;
    PetscInt       *dnnz      = NULL;
    const PetscInt *dofsArray = NULL;
    PetscInt        pStart, pEnd, ncell, offset, c, i, j;

    if (withArtificial) {
      PetscCall(ISGetIndices(patch->dofsWithArtificial, &dofsArray));
    } else {
      PetscCall(ISGetIndices(patch->dofs, &dofsArray));
    }
    PetscCall(PetscSectionGetChart(patch->cellCounts, &pStart, &pEnd));
    point += pStart;
    PetscCheck(point < pEnd, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Operator point %" PetscInt_FMT " not in [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
    PetscCall(PetscSectionGetDof(patch->cellCounts, point, &ncell));
    PetscCall(PetscSectionGetOffset(patch->cellCounts, point, &offset));
    PetscCall(PetscLogEventBegin(PC_Patch_Prealloc, pc, 0, 0, 0));
    /* A PetscBT uses N^2 bits to store the sparsity pattern on a
   * patch. This is probably OK if the patches are not too big,
   * but uses too much memory. We therefore switch based on rsize. */
    if (rsize < 3000) { /* FIXME: I picked this switch value out of my hat */
      PetscScalar *zeroes;
      PetscInt     rows;

      PetscCall(PetscCalloc1(rsize, &dnnz));
      PetscCall(PetscBTCreate(rsize * rsize, &bt));
      for (c = 0; c < ncell; ++c) {
        const PetscInt *idx = dofsArray + (offset + c) * patch->totalDofsPerCell;
        for (i = 0; i < patch->totalDofsPerCell; ++i) {
          const PetscInt row = idx[i];
          if (row < 0) continue;
          for (j = 0; j < patch->totalDofsPerCell; ++j) {
            const PetscInt col = idx[j];
            const PetscInt key = row * rsize + col;
            if (col < 0) continue;
            if (!PetscBTLookupSet(bt, key)) ++dnnz[row];
          }
        }
      }

      if (patch->usercomputeopintfacet) {
        const PetscInt *intFacetsArray = NULL;
        PetscInt        i, numIntFacets, intFacetOffset;
        const PetscInt *facetCells = NULL;

        PetscCall(PetscSectionGetDof(patch->intFacetCounts, point, &numIntFacets));
        PetscCall(PetscSectionGetOffset(patch->intFacetCounts, point, &intFacetOffset));
        PetscCall(ISGetIndices(patch->intFacetsToPatchCell, &facetCells));
        PetscCall(ISGetIndices(patch->intFacets, &intFacetsArray));
        for (i = 0; i < numIntFacets; i++) {
          const PetscInt cell0 = facetCells[2 * (intFacetOffset + i) + 0];
          const PetscInt cell1 = facetCells[2 * (intFacetOffset + i) + 1];
          PetscInt       celli, cellj;

          for (celli = 0; celli < patch->totalDofsPerCell; celli++) {
            const PetscInt row = dofsArray[(offset + cell0) * patch->totalDofsPerCell + celli];
            if (row < 0) continue;
            for (cellj = 0; cellj < patch->totalDofsPerCell; cellj++) {
              const PetscInt col = dofsArray[(offset + cell1) * patch->totalDofsPerCell + cellj];
              const PetscInt key = row * rsize + col;
              if (col < 0) continue;
              if (!PetscBTLookupSet(bt, key)) ++dnnz[row];
            }
          }

          for (celli = 0; celli < patch->totalDofsPerCell; celli++) {
            const PetscInt row = dofsArray[(offset + cell1) * patch->totalDofsPerCell + celli];
            if (row < 0) continue;
            for (cellj = 0; cellj < patch->totalDofsPerCell; cellj++) {
              const PetscInt col = dofsArray[(offset + cell0) * patch->totalDofsPerCell + cellj];
              const PetscInt key = row * rsize + col;
              if (col < 0) continue;
              if (!PetscBTLookupSet(bt, key)) ++dnnz[row];
            }
          }
        }
      }
      PetscCall(PetscBTDestroy(&bt));
      PetscCall(MatXAIJSetPreallocation(*mat, 1, dnnz, NULL, NULL, NULL));
      PetscCall(PetscFree(dnnz));

      PetscCall(PetscCalloc1(patch->totalDofsPerCell * patch->totalDofsPerCell, &zeroes));
      for (c = 0; c < ncell; ++c) {
        const PetscInt *idx = &dofsArray[(offset + c) * patch->totalDofsPerCell];
        PetscCall(MatSetValues(*mat, patch->totalDofsPerCell, idx, patch->totalDofsPerCell, idx, zeroes, INSERT_VALUES));
      }
      PetscCall(MatGetLocalSize(*mat, &rows, NULL));
      for (i = 0; i < rows; ++i) PetscCall(MatSetValues(*mat, 1, &i, 1, &i, zeroes, INSERT_VALUES));

      if (patch->usercomputeopintfacet) {
        const PetscInt *intFacetsArray = NULL;
        PetscInt        i, numIntFacets, intFacetOffset;
        const PetscInt *facetCells = NULL;

        PetscCall(PetscSectionGetDof(patch->intFacetCounts, point, &numIntFacets));
        PetscCall(PetscSectionGetOffset(patch->intFacetCounts, point, &intFacetOffset));
        PetscCall(ISGetIndices(patch->intFacetsToPatchCell, &facetCells));
        PetscCall(ISGetIndices(patch->intFacets, &intFacetsArray));
        for (i = 0; i < numIntFacets; i++) {
          const PetscInt  cell0    = facetCells[2 * (intFacetOffset + i) + 0];
          const PetscInt  cell1    = facetCells[2 * (intFacetOffset + i) + 1];
          const PetscInt *cell0idx = &dofsArray[(offset + cell0) * patch->totalDofsPerCell];
          const PetscInt *cell1idx = &dofsArray[(offset + cell1) * patch->totalDofsPerCell];
          PetscCall(MatSetValues(*mat, patch->totalDofsPerCell, cell0idx, patch->totalDofsPerCell, cell1idx, zeroes, INSERT_VALUES));
          PetscCall(MatSetValues(*mat, patch->totalDofsPerCell, cell1idx, patch->totalDofsPerCell, cell0idx, zeroes, INSERT_VALUES));
        }
      }

      PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
      PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));

      PetscCall(PetscFree(zeroes));

    } else { /* rsize too big, use MATPREALLOCATOR */
      Mat          preallocator;
      PetscScalar *vals;

      PetscCall(PetscCalloc1(patch->totalDofsPerCell * patch->totalDofsPerCell, &vals));
      PetscCall(MatCreate(PETSC_COMM_SELF, &preallocator));
      PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
      PetscCall(MatSetSizes(preallocator, rsize, rsize, rsize, rsize));
      PetscCall(MatSetUp(preallocator));

      for (c = 0; c < ncell; ++c) {
        const PetscInt *idx = dofsArray + (offset + c) * patch->totalDofsPerCell;
        PetscCall(MatSetValues(preallocator, patch->totalDofsPerCell, idx, patch->totalDofsPerCell, idx, vals, INSERT_VALUES));
      }

      if (patch->usercomputeopintfacet) {
        const PetscInt *intFacetsArray = NULL;
        PetscInt        i, numIntFacets, intFacetOffset;
        const PetscInt *facetCells = NULL;

        PetscCall(PetscSectionGetDof(patch->intFacetCounts, point, &numIntFacets));
        PetscCall(PetscSectionGetOffset(patch->intFacetCounts, point, &intFacetOffset));
        PetscCall(ISGetIndices(patch->intFacetsToPatchCell, &facetCells));
        PetscCall(ISGetIndices(patch->intFacets, &intFacetsArray));
        for (i = 0; i < numIntFacets; i++) {
          const PetscInt  cell0    = facetCells[2 * (intFacetOffset + i) + 0];
          const PetscInt  cell1    = facetCells[2 * (intFacetOffset + i) + 1];
          const PetscInt *cell0idx = &dofsArray[(offset + cell0) * patch->totalDofsPerCell];
          const PetscInt *cell1idx = &dofsArray[(offset + cell1) * patch->totalDofsPerCell];
          PetscCall(MatSetValues(preallocator, patch->totalDofsPerCell, cell0idx, patch->totalDofsPerCell, cell1idx, vals, INSERT_VALUES));
          PetscCall(MatSetValues(preallocator, patch->totalDofsPerCell, cell1idx, patch->totalDofsPerCell, cell0idx, vals, INSERT_VALUES));
        }
      }

      PetscCall(PetscFree(vals));
      PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
      PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
      PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_TRUE, *mat));
      PetscCall(MatDestroy(&preallocator));
    }
    PetscCall(PetscLogEventEnd(PC_Patch_Prealloc, pc, 0, 0, 0));
    if (withArtificial) {
      PetscCall(ISRestoreIndices(patch->dofsWithArtificial, &dofsArray));
    } else {
      PetscCall(ISRestoreIndices(patch->dofs, &dofsArray));
    }
  }
  PetscCall(MatSetUp(*mat));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchComputeFunction_DMPlex_Private(PC pc, PetscInt patchNum, Vec x, Vec F, IS cellIS, PetscInt n, const PetscInt *l2p, const PetscInt *l2pWithAll, void *ctx)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  DM              dm, plex;
  PetscSection    s;
  const PetscInt *parray, *oarray;
  PetscInt        Nf = patch->nsubspaces, Np, poff, p, f;

  PetscFunctionBegin;
  PetscCheck(!patch->precomputeElementTensors, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Precomputing element tensors not implemented with DMPlex compute operator");
  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  PetscCall(DMGetLocalSection(dm, &s));
  /* Set offset into patch */
  PetscCall(PetscSectionGetDof(patch->pointCounts, patchNum, &Np));
  PetscCall(PetscSectionGetOffset(patch->pointCounts, patchNum, &poff));
  PetscCall(ISGetIndices(patch->points, &parray));
  PetscCall(ISGetIndices(patch->offs, &oarray));
  for (f = 0; f < Nf; ++f) {
    for (p = 0; p < Np; ++p) {
      const PetscInt point = parray[poff + p];
      PetscInt       dof;

      PetscCall(PetscSectionGetFieldDof(patch->patchSection, point, f, &dof));
      PetscCall(PetscSectionSetFieldOffset(patch->patchSection, point, f, oarray[(poff + p) * Nf + f]));
      if (patch->nsubspaces == 1) PetscCall(PetscSectionSetOffset(patch->patchSection, point, oarray[(poff + p) * Nf + f]));
      else PetscCall(PetscSectionSetOffset(patch->patchSection, point, -1));
    }
  }
  PetscCall(ISRestoreIndices(patch->points, &parray));
  PetscCall(ISRestoreIndices(patch->offs, &oarray));
  if (patch->viewSection) PetscCall(ObjectView((PetscObject)patch->patchSection, patch->viewerSection, patch->formatSection));
  PetscCall(DMPlexComputeResidual_Patch_Internal(dm, patch->patchSection, cellIS, 0.0, x, NULL, F, ctx));
  PetscCall(DMDestroy(&dm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode PCPatchComputeFunction_Internal(PC pc, Vec x, Vec F, PetscInt point)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  const PetscInt *dofsArray;
  const PetscInt *dofsArrayWithAll;
  const PetscInt *cellsArray;
  PetscInt        ncell, offset, pStart, pEnd;

  PetscFunctionBegin;
  PetscCall(PetscLogEventBegin(PC_Patch_ComputeOp, pc, 0, 0, 0));
  PetscCheck(patch->usercomputeop, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PCPatchSetComputeOperator() to set callback");
  PetscCall(ISGetIndices(patch->dofs, &dofsArray));
  PetscCall(ISGetIndices(patch->dofsWithAll, &dofsArrayWithAll));
  PetscCall(ISGetIndices(patch->cells, &cellsArray));
  PetscCall(PetscSectionGetChart(patch->cellCounts, &pStart, &pEnd));

  point += pStart;
  PetscCheck(point < pEnd, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Operator point %" PetscInt_FMT " not in [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);

  PetscCall(PetscSectionGetDof(patch->cellCounts, point, &ncell));
  PetscCall(PetscSectionGetOffset(patch->cellCounts, point, &offset));
  if (ncell <= 0) {
    PetscCall(PetscLogEventEnd(PC_Patch_ComputeOp, pc, 0, 0, 0));
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  PetscCall(VecSet(F, 0.0));
  /* Cannot reuse the same IS because the geometry info is being cached in it */
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncell, cellsArray + offset, PETSC_USE_POINTER, &patch->cellIS));
  PetscCallBack("PCPatch callback", patch->usercomputef(pc, point, x, F, patch->cellIS, ncell * patch->totalDofsPerCell, dofsArray + offset * patch->totalDofsPerCell, dofsArrayWithAll + offset * patch->totalDofsPerCell, patch->usercomputefctx));
  PetscCall(ISDestroy(&patch->cellIS));
  PetscCall(ISRestoreIndices(patch->dofs, &dofsArray));
  PetscCall(ISRestoreIndices(patch->dofsWithAll, &dofsArrayWithAll));
  PetscCall(ISRestoreIndices(patch->cells, &cellsArray));
  if (patch->viewMatrix) {
    char name[PETSC_MAX_PATH_LEN];

    PetscCall(PetscSNPrintf(name, PETSC_MAX_PATH_LEN - 1, "Patch vector for Point %" PetscInt_FMT, point));
    PetscCall(PetscObjectSetName((PetscObject)F, name));
    PetscCall(ObjectView((PetscObject)F, patch->viewerMatrix, patch->formatMatrix));
  }
  PetscCall(PetscLogEventEnd(PC_Patch_ComputeOp, pc, 0, 0, 0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchComputeOperator_DMPlex_Private(PC pc, PetscInt patchNum, Vec x, Mat J, IS cellIS, PetscInt n, const PetscInt *l2p, const PetscInt *l2pWithAll, void *ctx)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  DM              dm, plex;
  PetscSection    s;
  const PetscInt *parray, *oarray;
  PetscInt        Nf = patch->nsubspaces, Np, poff, p, f;

  PetscFunctionBegin;
  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  PetscCall(DMGetLocalSection(dm, &s));
  /* Set offset into patch */
  PetscCall(PetscSectionGetDof(patch->pointCounts, patchNum, &Np));
  PetscCall(PetscSectionGetOffset(patch->pointCounts, patchNum, &poff));
  PetscCall(ISGetIndices(patch->points, &parray));
  PetscCall(ISGetIndices(patch->offs, &oarray));
  for (f = 0; f < Nf; ++f) {
    for (p = 0; p < Np; ++p) {
      const PetscInt point = parray[poff + p];
      PetscInt       dof;

      PetscCall(PetscSectionGetFieldDof(patch->patchSection, point, f, &dof));
      PetscCall(PetscSectionSetFieldOffset(patch->patchSection, point, f, oarray[(poff + p) * Nf + f]));
      if (patch->nsubspaces == 1) PetscCall(PetscSectionSetOffset(patch->patchSection, point, oarray[(poff + p) * Nf + f]));
      else PetscCall(PetscSectionSetOffset(patch->patchSection, point, -1));
    }
  }
  PetscCall(ISRestoreIndices(patch->points, &parray));
  PetscCall(ISRestoreIndices(patch->offs, &oarray));
  if (patch->viewSection) PetscCall(ObjectView((PetscObject)patch->patchSection, patch->viewerSection, patch->formatSection));
  /* TODO Shut off MatViewFromOptions() in MatAssemblyEnd() here */
  PetscCall(DMPlexComputeJacobian_Patch_Internal(dm, patch->patchSection, patch->patchSection, cellIS, 0.0, 0.0, x, NULL, J, J, ctx));
  PetscCall(DMDestroy(&dm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* This function zeros mat on entry */
PetscErrorCode PCPatchComputeOperator_Internal(PC pc, Vec x, Mat mat, PetscInt point, PetscBool withArtificial)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  const PetscInt *dofsArray;
  const PetscInt *dofsArrayWithAll = NULL;
  const PetscInt *cellsArray;
  PetscInt        ncell, offset, pStart, pEnd, numIntFacets, intFacetOffset;
  PetscBool       isNonlinear;

  PetscFunctionBegin;
  PetscCall(PetscLogEventBegin(PC_Patch_ComputeOp, pc, 0, 0, 0));
  isNonlinear = patch->isNonlinear;
  PetscCheck(patch->usercomputeop, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PCPatchSetComputeOperator() to set callback");
  if (withArtificial) {
    PetscCall(ISGetIndices(patch->dofsWithArtificial, &dofsArray));
  } else {
    PetscCall(ISGetIndices(patch->dofs, &dofsArray));
  }
  if (isNonlinear) PetscCall(ISGetIndices(patch->dofsWithAll, &dofsArrayWithAll));
  PetscCall(ISGetIndices(patch->cells, &cellsArray));
  PetscCall(PetscSectionGetChart(patch->cellCounts, &pStart, &pEnd));

  point += pStart;
  PetscCheck(point < pEnd, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Operator point %" PetscInt_FMT " not in [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);

  PetscCall(PetscSectionGetDof(patch->cellCounts, point, &ncell));
  PetscCall(PetscSectionGetOffset(patch->cellCounts, point, &offset));
  if (ncell <= 0) {
    PetscCall(PetscLogEventEnd(PC_Patch_ComputeOp, pc, 0, 0, 0));
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  PetscCall(MatZeroEntries(mat));
  if (patch->precomputeElementTensors) {
    PetscInt           i;
    PetscInt           ndof = patch->totalDofsPerCell;
    const PetscScalar *elementTensors;

    PetscCall(VecGetArrayRead(patch->cellMats, &elementTensors));
    for (i = 0; i < ncell; i++) {
      const PetscInt     cell = cellsArray[i + offset];
      const PetscInt    *idx  = dofsArray + (offset + i) * ndof;
      const PetscScalar *v    = elementTensors + patch->precomputedTensorLocations[cell] * ndof * ndof;
      PetscCall(MatSetValues(mat, ndof, idx, ndof, idx, v, ADD_VALUES));
    }
    PetscCall(VecRestoreArrayRead(patch->cellMats, &elementTensors));
    PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
  } else {
    /* Cannot reuse the same IS because the geometry info is being cached in it */
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncell, cellsArray + offset, PETSC_USE_POINTER, &patch->cellIS));
    PetscCallBack("PCPatch callback",
                  patch->usercomputeop(pc, point, x, mat, patch->cellIS, ncell * patch->totalDofsPerCell, dofsArray + offset * patch->totalDofsPerCell, PetscSafePointerPlusOffset(dofsArrayWithAll, offset * patch->totalDofsPerCell), patch->usercomputeopctx));
  }
  if (patch->usercomputeopintfacet) {
    PetscCall(PetscSectionGetDof(patch->intFacetCounts, point, &numIntFacets));
    PetscCall(PetscSectionGetOffset(patch->intFacetCounts, point, &intFacetOffset));
    if (numIntFacets > 0) {
      /* For each interior facet, grab the two cells (in local numbering, and concatenate dof numberings for those cells) */
      PetscInt       *facetDofs = NULL, *facetDofsWithAll = NULL;
      const PetscInt *intFacetsArray = NULL;
      PetscInt        idx            = 0;
      PetscInt        i, c, d;
      PetscInt        fStart;
      DM              dm, plex;
      IS              facetIS    = NULL;
      const PetscInt *facetCells = NULL;

      PetscCall(ISGetIndices(patch->intFacetsToPatchCell, &facetCells));
      PetscCall(ISGetIndices(patch->intFacets, &intFacetsArray));
      PetscCall(PCGetDM(pc, &dm));
      PetscCall(DMConvert(dm, DMPLEX, &plex));
      dm = plex;
      PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, NULL));
      /* FIXME: Pull this malloc out. */
      PetscCall(PetscMalloc1(2 * patch->totalDofsPerCell * numIntFacets, &facetDofs));
      if (dofsArrayWithAll) PetscCall(PetscMalloc1(2 * patch->totalDofsPerCell * numIntFacets, &facetDofsWithAll));
      if (patch->precomputeElementTensors) {
        PetscInt           nFacetDof = 2 * patch->totalDofsPerCell;
        const PetscScalar *elementTensors;

        PetscCall(VecGetArrayRead(patch->intFacetMats, &elementTensors));

        for (i = 0; i < numIntFacets; i++) {
          const PetscInt     facet = intFacetsArray[i + intFacetOffset];
          const PetscScalar *v     = elementTensors + patch->precomputedIntFacetTensorLocations[facet - fStart] * nFacetDof * nFacetDof;
          idx                      = 0;
          /*
     0--1
     |\-|
     |+\|
     2--3
     [0, 2, 3, 0, 1, 3]
   */
          for (c = 0; c < 2; c++) {
            const PetscInt cell = facetCells[2 * (intFacetOffset + i) + c];
            for (d = 0; d < patch->totalDofsPerCell; d++) {
              facetDofs[idx] = dofsArray[(offset + cell) * patch->totalDofsPerCell + d];
              idx++;
            }
          }
          PetscCall(MatSetValues(mat, nFacetDof, facetDofs, nFacetDof, facetDofs, v, ADD_VALUES));
        }
        PetscCall(VecRestoreArrayRead(patch->intFacetMats, &elementTensors));
      } else {
        /*
     0--1
     |\-|
     |+\|
     2--3
     [0, 2, 3, 0, 1, 3]
   */
        for (i = 0; i < numIntFacets; i++) {
          for (c = 0; c < 2; c++) {
            const PetscInt cell = facetCells[2 * (intFacetOffset + i) + c];
            for (d = 0; d < patch->totalDofsPerCell; d++) {
              facetDofs[idx] = dofsArray[(offset + cell) * patch->totalDofsPerCell + d];
              if (dofsArrayWithAll) facetDofsWithAll[idx] = dofsArrayWithAll[(offset + cell) * patch->totalDofsPerCell + d];
              idx++;
            }
          }
        }
        PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numIntFacets, intFacetsArray + intFacetOffset, PETSC_USE_POINTER, &facetIS));
        PetscCall(patch->usercomputeopintfacet(pc, point, x, mat, facetIS, 2 * numIntFacets * patch->totalDofsPerCell, facetDofs, facetDofsWithAll, patch->usercomputeopintfacetctx));
        PetscCall(ISDestroy(&facetIS));
      }
      PetscCall(ISRestoreIndices(patch->intFacetsToPatchCell, &facetCells));
      PetscCall(ISRestoreIndices(patch->intFacets, &intFacetsArray));
      PetscCall(PetscFree(facetDofs));
      PetscCall(PetscFree(facetDofsWithAll));
      PetscCall(DMDestroy(&dm));
    }
  }

  PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));

  if (!(withArtificial || isNonlinear) && patch->denseinverse) {
    MatFactorInfo info;
    PetscBool     flg;
    PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQDENSE, &flg));
    PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Invalid Mat type for dense inverse");
    PetscCall(MatFactorInfoInitialize(&info));
    PetscCall(MatLUFactor(mat, NULL, NULL, &info));
    PetscCall(MatSeqDenseInvertFactors_Private(mat));
  }
  PetscCall(ISDestroy(&patch->cellIS));
  if (withArtificial) {
    PetscCall(ISRestoreIndices(patch->dofsWithArtificial, &dofsArray));
  } else {
    PetscCall(ISRestoreIndices(patch->dofs, &dofsArray));
  }
  if (isNonlinear) PetscCall(ISRestoreIndices(patch->dofsWithAll, &dofsArrayWithAll));
  PetscCall(ISRestoreIndices(patch->cells, &cellsArray));
  if (patch->viewMatrix) {
    char name[PETSC_MAX_PATH_LEN];

    PetscCall(PetscSNPrintf(name, PETSC_MAX_PATH_LEN - 1, "Patch matrix for Point %" PetscInt_FMT, point));
    PetscCall(PetscObjectSetName((PetscObject)mat, name));
    PetscCall(ObjectView((PetscObject)mat, patch->viewerMatrix, patch->formatMatrix));
  }
  PetscCall(PetscLogEventEnd(PC_Patch_ComputeOp, pc, 0, 0, 0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode MatSetValues_PCPatch_Private(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar *v, InsertMode addv)
{
  Vec          data;
  PetscScalar *array;
  PetscInt     bs, nz, i, j, cell;

  PetscCall(MatShellGetContext(mat, &data));
  PetscCall(VecGetBlockSize(data, &bs));
  PetscCall(VecGetSize(data, &nz));
  PetscCall(VecGetArray(data, &array));
  PetscCheck(m == n, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Only for square insertion");
  cell = idxm[0] / bs; /* use the fact that this is called once per cell */
  for (i = 0; i < m; i++) {
    PetscCheck(idxm[i] == idxn[i], PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Row and column indices must match!");
    for (j = 0; j < n; j++) {
      const PetscScalar v_ = v[i * bs + j];
      /* Indexing is special to the data structure we have! */
      if (addv == INSERT_VALUES) {
        array[cell * bs * bs + i * bs + j] = v_;
      } else {
        array[cell * bs * bs + i * bs + j] += v_;
      }
    }
  }
  PetscCall(VecRestoreArray(data, &array));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCPatchPrecomputePatchTensors_Private(PC pc)
{
  PC_PATCH       *patch = (PC_PATCH *)pc->data;
  const PetscInt *cellsArray;
  PetscInt        ncell, offset;
  const PetscInt *dofMapArray;
  PetscInt        i, j;
  IS              dofMap;
  IS              cellIS;
  const PetscInt  ndof = patch->totalDofsPerCell;
  Mat             vecMat;
  PetscInt        cStart, cEnd;
  DM              dm, plex;

  PetscCall(ISGetSize(patch->cells, &ncell));
  if (!ncell) { /* No cells to assemble over -> skip */
    PetscFunctionReturn(PETSC_SUCCESS);
  }

  PetscCall(PetscLogEventBegin(PC_Patch_ComputeOp, pc, 0, 0, 0));

  PetscCall(PCGetDM(pc, &dm));
  PetscCall(DMConvert(dm, DMPLEX, &plex));
  dm = plex;
  if (!patch->allCells) {
    PetscHSetI    cells;
    PetscHashIter hi;
    PetscInt      pStart, pEnd;
    PetscInt     *allCells = NULL;
    PetscCall(PetscHSetICreate(&cells));
    PetscCall(ISGetIndices(patch->cells, &cellsArray));
    PetscCall(PetscSectionGetChart(patch->cellCounts, &pStart, &pEnd));
    for (i = pStart; i < pEnd; i++) {
      PetscCall(PetscSectionGetDof(patch->cellCounts, i, &ncell));
      PetscCall(PetscSectionGetOffset(patch->cellCounts, i, &offset));
      if (ncell <= 0) continue;
      for (j = 0; j < ncell; j++) PetscCall(PetscHSetIAdd(cells, cellsArray[offset + j]));
    }
    PetscCall(ISRestoreIndices(patch->cells, &cellsArray));
    PetscCall(PetscHSetIGetSize(cells, &ncell));
    PetscCall(PetscMalloc1(ncell, &allCells));
    PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
    PetscCall(PetscMalloc1(cEnd - cStart, &patch->precomputedTensorLocations));
    i = 0;
    PetscHashIterBegin(cells, hi);
    while (!PetscHashIterAtEnd(cells, hi)) {
      PetscHashIterGetKey(cells, hi, allCells[i]);
      patch->precomputedTensorLocations[allCells[i]] = i;
      PetscHashIterNext(cells, hi);
      i++;
    }
    PetscCall(PetscHSetIDestroy(&cells));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncell, allCells, PETSC_OWN_POINTER, &patch->allCells));
  }
  PetscCall(ISGetSize(patch->allCells, &ncell));
  if (!patch->cellMats) {
    PetscCall(VecCreateSeq(PETSC_COMM_SELF, ncell * ndof * ndof, &patch->cellMats));
    PetscCall(VecSetBlockSize(patch->cellMats, ndof));
  }
  PetscCall(VecSet(patch->cellMats, 0));

  PetscCall(MatCreateShell(PETSC_COMM_SELF, ncell * ndof, ncell * ndof, ncell * ndof, ncell * ndof, (void *)patch->cellMats, &vecMat));
  PetscCall(MatShellSetOperation(vecMat, MATOP_SET_VALUES, (PetscErrorCodeFn *)MatSetValues_PCPatch_Private));
  PetscCall(ISGetSize(patch->allCells, &ncell));
  PetscCall(ISCreateStride(PETSC_COMM_SELF, ndof * ncell, 0, 1, &dofMap));
  PetscCall(ISGetIndices(dofMap, &dofMapArray));
  PetscCall(ISGetIndices(patch->allCells, &cellsArray));
  PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncell, cellsArray, PETSC_USE_POINTER, &cellIS));
  /* TODO: Fix for DMPlex compute op, this bypasses a lot of the machinery and just assembles every element tensor. */
  PetscCallBack("PCPatch callback", patch->usercomputeop(pc, -1, NULL, vecMat, cellIS, ndof * ncell, dofMapArray, NULL, patch->usercomputeopctx));
  PetscCall(ISDestroy(&cellIS));
  PetscCall(MatDestroy(&vecMat));
  PetscCall(ISRestoreIndices(patch->allCells, &cellsArray));
  PetscCall(ISRestoreIndices(dofMap, &dofMapArray));
  PetscCall(ISDestroy(&dofMap));

  if (patch->usercomputeopintfacet) {
    PetscInt        nIntFacets;
    IS              intFacetsIS;
    const PetscInt *intFacetsArray = NULL;
    if (!patch->allIntFacets) {
      PetscHSetI    facets;
      PetscHashIter hi;
      PetscInt      pStart, pEnd, fStart, fEnd;
      PetscInt     *allIntFacets = NULL;
      PetscCall(PetscHSetICreate(&facets));
      PetscCall(ISGetIndices(patch->intFacets, &intFacetsArray));
      PetscCall(PetscSectionGetChart(patch->intFacetCounts, &pStart, &pEnd));
      PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
      for (i = pStart; i < pEnd; i++) {
        PetscCall(PetscSectionGetDof(patch->intFacetCounts, i, &nIntFacets));
        PetscCall(PetscSectionGetOffset(patch->intFacetCounts, i, &offset));
        if (nIntFacets <= 0) continue;
        for (j = 0; j < nIntFacets; j++) PetscCall(PetscHSetIAdd(facets, intFacetsArray[offset + j]));
      }
      PetscCall(ISRestoreIndices(patch->intFacets, &intFacetsArray));
      PetscCall(PetscHSetIGetSize(facets, &nIntFacets));
      PetscCall(PetscMalloc1(nIntFacets, &allIntFacets));
      PetscCall(PetscMalloc1(fEnd - fStart, &patch->precomputedIntFacetTensorLocations));
      i = 0;
      PetscHashIterBegin(facets, hi);
      while (!PetscHashIterAtEnd(facets, hi)) {
        PetscHashIterGetKey(facets, hi, allIntFacets[i]);
        patch->precomputedIntFacetTensorLocations[allIntFacets[i] - fStart] = i;
        PetscHashIterNext(facets, hi);
        i++;
      }
      PetscCall(PetscHSetIDestroy(&facets));
      PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nIntFacets, allIntFacets, PETSC_OWN_POINTER, &patch->allIntFacets));
    }
    PetscCall(ISGetSize(patch->allIntFacets, &nIntFacets));
    if (!patch->intFacetMats) {
      PetscCall(VecCreateSeq(PETSC_COMM_SELF, nIntFacets * ndof * ndof * 4, &patch->intFacetMats));
      PetscCall(VecSetBlockSize(patch->intFacetMats, ndof * 2));
    }
    PetscCall(VecSet(patch->intFacetMats, 0));

    PetscCall(MatCreateShell(PETSC_COMM_SELF, nIntFacets * ndof * 2, nIntFacets * ndof * 2, nIntFacets * ndof * 2, nIntFacets * ndof * 2, (void *)patch->intFacetMats, &vecMat));
    PetscCall(MatShellSetOperation(vecMat, MATOP_SET_VALUES, (PetscErrorCodeFn *)MatSetValues_PCPatch_Private));
    PetscCall(ISCreateStride(PETSC_COMM_SELF, 2 * ndof * nIntFacets, 0, 1, &dofMap));
    PetscCall(ISGetIndices(dofMap, &dofMapArray));
    PetscCall(ISGetIndices(patch->allIntFacets, &intFacetsArray));
    PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nIntFacets, intFacetsArray, PETSC_USE_POINTER, &intFacetsIS));
    /* TODO: Fix for DMPlex compute op, this bypasses a lot of the machinery and just assembles every element tensor. */
    PetscCallBack("PCPatch callback (interior facets)", patch->usercomputeopintfacet(pc, -1, NULL, vecMat, intFacetsIS, 2 * ndof * nIntFacets, dofMapArray, NULL, patch->usercomputeopintfacetctx));
    PetscCall(ISDestroy(&intFacetsIS));
    PetscCall(MatDestroy(&vecMat));
    PetscCall(ISRestoreIndices(patch->allIntFacets, &intFacetsArray));
    PetscCall(ISRestoreIndices(dofMap, &dofMapArray));
    PetscCall(ISDestroy(&dofMap));
  }
  PetscCall(DMDestroy(&dm));
  PetscCall(PetscLogEventEnd(PC_Patch_ComputeOp, pc, 0, 0, 0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode PCPatch_ScatterLocal_Private(PC pc, PetscInt p, Vec x, Vec y, InsertMode mode, ScatterMode scat, PatchScatterType scattertype)
{
  PC_PATCH          *patch     = (PC_PATCH *)pc->data;
  const PetscScalar *xArray    = NULL;
  PetscScalar       *yArray    = NULL;
  const PetscInt    *gtolArray = NULL;
  PetscInt           dof, offset, lidx;

  PetscFunctionBeginHot;
  PetscCall(VecGetArrayRead(x, &xArray));
  PetscCall(VecGetArray(y, &yArray));
  if (scattertype == SCATTER_WITHARTIFICIAL) {
    PetscCall(PetscSectionGetDof(patch->gtolCountsWithArtificial, p, &dof));
    PetscCall(PetscSectionGetOffset(patch->gtolCountsWithArtificial, p, &offset));
    PetscCall(ISGetIndices(patch->gtolWithArtificial, &gtolArray));
  } else if (scattertype == SCATTER_WITHALL) {
    PetscCall(PetscSectionGetDof(patch->gtolCountsWithAll, p, &dof));
    PetscCall(PetscSectionGetOffset(patch->gtolCountsWithAll, p, &offset));
    PetscCall(ISGetIndices(patch->gtolWithAll, &gtolArray));
  } else {
    PetscCall(PetscSectionGetDof(patch->gtolCounts, p, &dof));
    PetscCall(PetscSectionGetOffset(patch->gtolCounts, p, &offset));
    PetscCall(ISGetIndices(patch->gtol, &gtolArray));
  }
  PetscCheck(mode != INSERT_VALUES || scat == SCATTER_FORWARD, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Can't insert if not scattering forward");
  PetscCheck(mode != ADD_VALUES || scat == SCATTER_REVERSE, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Can't add if not scattering reverse");
  for (lidx = 0; lidx < dof; ++lidx) {
    const PetscInt gidx = gtolArray[offset + lidx];

    if (mode == INSERT_VALUES) yArray[lidx] = xArray[gidx]; /* Forward */
    else yArray[gidx] += xArray[lidx];                      /* Reverse */
  }
  if (scattertype == SCATTER_WITHARTIFICIAL) {
    PetscCall(ISRestoreIndices(patch->gtolWithArtificial, &gtolArray));
  } else if (scattertype == SCATTER_WITHALL) {
    PetscCall(ISRestoreIndices(patch->gtolWithAll, &gtolArray));
  } else {
    PetscCall(ISRestoreIndices(patch->gtol, &gtolArray));
  }
  PetscCall(VecRestoreArrayRead(x, &xArray));
  PetscCall(VecRestoreArray(y, &yArray));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCSetUp_PATCH_Linear(PC pc)
{
  PC_PATCH   *patch = (PC_PATCH *)pc->data;
  const char *prefix;
  PetscInt    i;

  PetscFunctionBegin;
  if (!pc->setupcalled) {
    PetscCheck(patch->save_operators || !patch->denseinverse, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Can't have dense inverse without save operators");
    if (!patch->denseinverse) {
      PetscCall(PetscMalloc1(patch->npatch, &patch->solver));
      PetscCall(PCGetOptionsPrefix(pc, &prefix));
      for (i = 0; i < patch->npatch; ++i) {
        KSP ksp;
        PC  subpc;

        PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
        PetscCall(KSPSetNestLevel(ksp, pc->kspnestlevel));
        PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
        PetscCall(KSPSetOptionsPrefix(ksp, prefix));
        PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
        PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
        PetscCall(KSPGetPC(ksp, &subpc));
        PetscCall(PetscObjectIncrementTabLevel((PetscObject)subpc, (PetscObject)pc, 1));
        patch->solver[i] = (PetscObject)ksp;
      }
    }
  }
  if (patch->save_operators) {
    if (patch->precomputeElementTensors) PetscCall(PCPatchPrecomputePatchTensors_Private(pc));
    for (i = 0; i < patch->npatch; ++i) {
      PetscCall(PCPatchComputeOperator_Internal(pc, NULL, patch->mat[i], i, PETSC_FALSE));
      if (!patch->denseinverse) {
        PetscCall(KSPSetOperators((KSP)patch->solver[i], patch->mat[i], patch->mat[i]));
      } else if (patch->mat[i] && !patch->densesolve) {
        /* Setup matmult callback */
        PetscCall(MatGetOperation(patch->mat[i], MATOP_MULT, (PetscErrorCodeFn **)&patch->densesolve));
      }
    }
  }
  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
    for (i = 0; i < patch->npatch; ++i) {
      /* Instead of padding patch->patchUpdate with zeros to get */
      /* patch->patchUpdateWithArtificial and then multiplying with the matrix, */
      /* just get rid of the columns that correspond to the dofs with */
      /* artificial bcs. That's of course fairly inefficient, hopefully we */
      /* can just assemble the rectangular matrix in the first place. */
      Mat      matSquare;
      IS       rowis;
      PetscInt dof;

      PetscCall(MatGetSize(patch->mat[i], &dof, NULL));
      if (dof == 0) {
        patch->matWithArtificial[i] = NULL;
        continue;
      }

      PetscCall(PCPatchCreateMatrix_Private(pc, i, &matSquare, PETSC_TRUE));
      PetscCall(PCPatchComputeOperator_Internal(pc, NULL, matSquare, i, PETSC_TRUE));

      PetscCall(MatGetSize(matSquare, &dof, NULL));
      PetscCall(ISCreateStride(PETSC_COMM_SELF, dof, 0, 1, &rowis));
      if (pc->setupcalled) {
        PetscCall(MatCreateSubMatrix(matSquare, rowis, patch->dofMappingWithoutToWithArtificial[i], MAT_REUSE_MATRIX, &patch->matWithArtificial[i]));
      } else {
        PetscCall(MatCreateSubMatrix(matSquare, rowis, patch->dofMappingWithoutToWithArtificial[i], MAT_INITIAL_MATRIX, &patch->matWithArtificial[i]));
      }
      PetscCall(ISDestroy(&rowis));
      PetscCall(MatDestroy(&matSquare));
    }
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCSetUp_PATCH(PC pc)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  PetscInt  i;
  PetscBool isNonlinear;
  PetscInt  maxDof = -1, maxDofWithArtificial = -1;

  PetscFunctionBegin;
  if (!pc->setupcalled) {
    PetscInt pStart, pEnd, p;
    PetscInt localSize;

    PetscCall(PetscLogEventBegin(PC_Patch_CreatePatches, pc, 0, 0, 0));

    isNonlinear = patch->isNonlinear;
    if (!patch->nsubspaces) {
      DM           dm, plex;
      PetscSection s;
      PetscInt     cStart, cEnd, c, Nf, f, numGlobalBcs = 0, *globalBcs, *Nb, **cellDofs;

      PetscCall(PCGetDM(pc, &dm));
      PetscCheck(dm, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Must set DM for PCPATCH or call PCPatchSetDiscretisationInfo()");
      PetscCall(DMConvert(dm, DMPLEX, &plex));
      dm = plex;
      PetscCall(DMGetLocalSection(dm, &s));
      PetscCall(PetscSectionGetNumFields(s, &Nf));
      PetscCall(PetscSectionGetChart(s, &pStart, &pEnd));
      for (p = pStart; p < pEnd; ++p) {
        PetscInt cdof;
        PetscCall(PetscSectionGetConstraintDof(s, p, &cdof));
        numGlobalBcs += cdof;
      }
      PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
      PetscCall(PetscMalloc3(Nf, &Nb, Nf, &cellDofs, numGlobalBcs, &globalBcs));
      for (f = 0; f < Nf; ++f) {
        PetscFE        fe;
        PetscDualSpace sp;
        PetscInt       cdoff = 0;

        PetscCall(DMGetField(dm, f, NULL, (PetscObject *)&fe));
        /* PetscCall(PetscFEGetNumComponents(fe, &Nc[f])); */
        PetscCall(PetscFEGetDualSpace(fe, &sp));
        PetscCall(PetscDualSpaceGetDimension(sp, &Nb[f]));

        PetscCall(PetscMalloc1((cEnd - cStart) * Nb[f], &cellDofs[f]));
        for (c = cStart; c < cEnd; ++c) {
          PetscInt *closure = NULL;
          PetscInt  clSize  = 0, cl;

          PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &clSize, &closure));
          for (cl = 0; cl < clSize * 2; cl += 2) {
            const PetscInt p = closure[cl];
            PetscInt       fdof, d, foff;

            PetscCall(PetscSectionGetFieldDof(s, p, f, &fdof));
            PetscCall(PetscSectionGetFieldOffset(s, p, f, &foff));
            for (d = 0; d < fdof; ++d, ++cdoff) cellDofs[f][cdoff] = foff + d;
          }
          PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &clSize, &closure));
        }
        PetscCheck(cdoff == (cEnd - cStart) * Nb[f], PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_SIZ, "Total number of cellDofs %" PetscInt_FMT " for field %" PetscInt_FMT " should be Nc (%" PetscInt_FMT ") * cellDof (%" PetscInt_FMT ")", cdoff, f, cEnd - cStart, Nb[f]);
      }
      numGlobalBcs = 0;
      for (p = pStart; p < pEnd; ++p) {
        const PetscInt *ind;
        PetscInt        off, cdof, d;

        PetscCall(PetscSectionGetOffset(s, p, &off));
        PetscCall(PetscSectionGetConstraintDof(s, p, &cdof));
        PetscCall(PetscSectionGetConstraintIndices(s, p, &ind));
        for (d = 0; d < cdof; ++d) globalBcs[numGlobalBcs++] = off + ind[d];
      }

      PetscCall(PCPatchSetDiscretisationInfoCombined(pc, dm, Nb, (const PetscInt **)cellDofs, numGlobalBcs, globalBcs, numGlobalBcs, globalBcs));
      for (f = 0; f < Nf; ++f) PetscCall(PetscFree(cellDofs[f]));
      PetscCall(PetscFree3(Nb, cellDofs, globalBcs));
      PetscCall(PCPatchSetComputeFunction(pc, PCPatchComputeFunction_DMPlex_Private, NULL));
      PetscCall(PCPatchSetComputeOperator(pc, PCPatchComputeOperator_DMPlex_Private, NULL));
      PetscCall(DMDestroy(&dm));
    }

    localSize = patch->subspaceOffsets[patch->nsubspaces];
    PetscCall(VecCreateSeq(PETSC_COMM_SELF, localSize, &patch->localRHS));
    PetscCall(VecSetUp(patch->localRHS));
    PetscCall(VecDuplicate(patch->localRHS, &patch->localUpdate));
    PetscCall(PCPatchCreateCellPatches(pc));
    PetscCall(PCPatchCreateCellPatchDiscretisationInfo(pc));

    /* OK, now build the work vectors */
    PetscCall(PetscSectionGetChart(patch->gtolCounts, &pStart, &pEnd));

    if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) PetscCall(PetscMalloc1(patch->npatch, &patch->dofMappingWithoutToWithArtificial));
    if (isNonlinear) PetscCall(PetscMalloc1(patch->npatch, &patch->dofMappingWithoutToWithAll));
    for (p = pStart; p < pEnd; ++p) {
      PetscInt dof;

      PetscCall(PetscSectionGetDof(patch->gtolCounts, p, &dof));
      maxDof = PetscMax(maxDof, dof);
      if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
        const PetscInt *gtolArray, *gtolArrayWithArtificial = NULL;
        PetscInt        numPatchDofs, offset;
        PetscInt        numPatchDofsWithArtificial, offsetWithArtificial;
        PetscInt        dofWithoutArtificialCounter = 0;
        PetscInt       *patchWithoutArtificialToWithArtificialArray;

        PetscCall(PetscSectionGetDof(patch->gtolCountsWithArtificial, p, &dof));
        maxDofWithArtificial = PetscMax(maxDofWithArtificial, dof);

        /* Now build the mapping that for a dof in a patch WITHOUT dofs that have artificial bcs gives the */
        /* the index in the patch with all dofs */
        PetscCall(ISGetIndices(patch->gtol, &gtolArray));

        PetscCall(PetscSectionGetDof(patch->gtolCounts, p, &numPatchDofs));
        if (numPatchDofs == 0) {
          patch->dofMappingWithoutToWithArtificial[p - pStart] = NULL;
          continue;
        }

        PetscCall(PetscSectionGetOffset(patch->gtolCounts, p, &offset));
        PetscCall(ISGetIndices(patch->gtolWithArtificial, &gtolArrayWithArtificial));
        PetscCall(PetscSectionGetDof(patch->gtolCountsWithArtificial, p, &numPatchDofsWithArtificial));
        PetscCall(PetscSectionGetOffset(patch->gtolCountsWithArtificial, p, &offsetWithArtificial));

        PetscCall(PetscMalloc1(numPatchDofs, &patchWithoutArtificialToWithArtificialArray));
        for (i = 0; i < numPatchDofsWithArtificial; i++) {
          if (gtolArrayWithArtificial[i + offsetWithArtificial] == gtolArray[offset + dofWithoutArtificialCounter]) {
            patchWithoutArtificialToWithArtificialArray[dofWithoutArtificialCounter] = i;
            dofWithoutArtificialCounter++;
            if (dofWithoutArtificialCounter == numPatchDofs) break;
          }
        }
        PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPatchDofs, patchWithoutArtificialToWithArtificialArray, PETSC_OWN_POINTER, &patch->dofMappingWithoutToWithArtificial[p - pStart]));
        PetscCall(ISRestoreIndices(patch->gtol, &gtolArray));
        PetscCall(ISRestoreIndices(patch->gtolWithArtificial, &gtolArrayWithArtificial));
      }
    }
    for (p = pStart; p < pEnd; ++p) {
      if (isNonlinear) {
        const PetscInt *gtolArray, *gtolArrayWithAll = NULL;
        PetscInt        numPatchDofs, offset;
        PetscInt        numPatchDofsWithAll, offsetWithAll;
        PetscInt        dofWithoutAllCounter = 0;
        PetscInt       *patchWithoutAllToWithAllArray;

        /* Now build the mapping that for a dof in a patch WITHOUT dofs that have artificial bcs gives the */
        /* the index in the patch with all dofs */
        PetscCall(ISGetIndices(patch->gtol, &gtolArray));

        PetscCall(PetscSectionGetDof(patch->gtolCounts, p, &numPatchDofs));
        if (numPatchDofs == 0) {
          patch->dofMappingWithoutToWithAll[p - pStart] = NULL;
          continue;
        }

        PetscCall(PetscSectionGetOffset(patch->gtolCounts, p, &offset));
        PetscCall(ISGetIndices(patch->gtolWithAll, &gtolArrayWithAll));
        PetscCall(PetscSectionGetDof(patch->gtolCountsWithAll, p, &numPatchDofsWithAll));
        PetscCall(PetscSectionGetOffset(patch->gtolCountsWithAll, p, &offsetWithAll));

        PetscCall(PetscMalloc1(numPatchDofs, &patchWithoutAllToWithAllArray));

        for (i = 0; i < numPatchDofsWithAll; i++) {
          if (gtolArrayWithAll[i + offsetWithAll] == gtolArray[offset + dofWithoutAllCounter]) {
            patchWithoutAllToWithAllArray[dofWithoutAllCounter] = i;
            dofWithoutAllCounter++;
            if (dofWithoutAllCounter == numPatchDofs) break;
          }
        }
        PetscCall(ISCreateGeneral(PETSC_COMM_SELF, numPatchDofs, patchWithoutAllToWithAllArray, PETSC_OWN_POINTER, &patch->dofMappingWithoutToWithAll[p - pStart]));
        PetscCall(ISRestoreIndices(patch->gtol, &gtolArray));
        PetscCall(ISRestoreIndices(patch->gtolWithAll, &gtolArrayWithAll));
      }
    }
    if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
      PetscCall(VecCreateSeq(PETSC_COMM_SELF, maxDofWithArtificial, &patch->patchRHSWithArtificial));
      PetscCall(VecSetUp(patch->patchRHSWithArtificial));
    }
    PetscCall(VecCreateSeq(PETSC_COMM_SELF, maxDof, &patch->patchRHS));
    PetscCall(VecSetUp(patch->patchRHS));
    PetscCall(VecCreateSeq(PETSC_COMM_SELF, maxDof, &patch->patchUpdate));
    PetscCall(VecSetUp(patch->patchUpdate));
    if (patch->save_operators) {
      PetscCall(PetscMalloc1(patch->npatch, &patch->mat));
      for (i = 0; i < patch->npatch; ++i) PetscCall(PCPatchCreateMatrix_Private(pc, i, &patch->mat[i], PETSC_FALSE));
    }
    PetscCall(PetscLogEventEnd(PC_Patch_CreatePatches, pc, 0, 0, 0));

    /* If desired, calculate weights for dof multiplicity */
    if (patch->partition_of_unity) {
      PetscScalar *input  = NULL;
      PetscScalar *output = NULL;
      Vec          global;

      PetscCall(VecDuplicate(patch->localRHS, &patch->dof_weights));
      if (patch->local_composition_type == PC_COMPOSITE_ADDITIVE) {
        for (i = 0; i < patch->npatch; ++i) {
          PetscInt dof;

          PetscCall(PetscSectionGetDof(patch->gtolCounts, i + pStart, &dof));
          if (dof <= 0) continue;
          PetscCall(VecSet(patch->patchRHS, 1.0));
          PetscCall(PCPatch_ScatterLocal_Private(pc, i + pStart, patch->patchRHS, patch->dof_weights, ADD_VALUES, SCATTER_REVERSE, SCATTER_INTERIOR));
        }
      } else {
        /* multiplicative is actually only locally multiplicative and globally additive. need the pou where the mesh decomposition overlaps */
        PetscCall(VecSet(patch->dof_weights, 1.0));
      }

      PetscCall(VecDuplicate(patch->dof_weights, &global));
      PetscCall(VecSet(global, 0.));

      PetscCall(VecGetArray(patch->dof_weights, &input));
      PetscCall(VecGetArray(global, &output));
      PetscCall(PetscSFReduceBegin(patch->sectionSF, MPIU_SCALAR, input, output, MPI_SUM));
      PetscCall(PetscSFReduceEnd(patch->sectionSF, MPIU_SCALAR, input, output, MPI_SUM));
      PetscCall(VecRestoreArray(patch->dof_weights, &input));
      PetscCall(VecRestoreArray(global, &output));

      PetscCall(VecReciprocal(global));

      PetscCall(VecGetArray(patch->dof_weights, &output));
      PetscCall(VecGetArray(global, &input));
      PetscCall(PetscSFBcastBegin(patch->sectionSF, MPIU_SCALAR, input, output, MPI_REPLACE));
      PetscCall(PetscSFBcastEnd(patch->sectionSF, MPIU_SCALAR, input, output, MPI_REPLACE));
      PetscCall(VecRestoreArray(patch->dof_weights, &output));
      PetscCall(VecRestoreArray(global, &input));
      PetscCall(VecDestroy(&global));
    }
    if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE && patch->save_operators && !patch->isNonlinear) PetscCall(PetscMalloc1(patch->npatch, &patch->matWithArtificial));
  }
  PetscCall((*patch->setupsolver)(pc));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCApply_PATCH_Linear(PC pc, PetscInt i, Vec x, Vec y)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  KSP       ksp;
  Mat       op;
  PetscInt  m, n;

  PetscFunctionBegin;
  if (patch->denseinverse) {
    PetscCall((*patch->densesolve)(patch->mat[i], x, y));
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  ksp = (KSP)patch->solver[i];
  if (!patch->save_operators) {
    Mat mat;

    PetscCall(PCPatchCreateMatrix_Private(pc, i, &mat, PETSC_FALSE));
    /* Populate operator here. */
    PetscCall(PCPatchComputeOperator_Internal(pc, NULL, mat, i, PETSC_FALSE));
    PetscCall(KSPSetOperators(ksp, mat, mat));
    /* Drop reference so the KSPSetOperators below will blow it away. */
    PetscCall(MatDestroy(&mat));
  }
  PetscCall(PetscLogEventBegin(PC_Patch_Solve, pc, 0, 0, 0));
  if (!ksp->setfromoptionscalled) PetscCall(KSPSetFromOptions(ksp));
  /* Disgusting trick to reuse work vectors */
  PetscCall(KSPGetOperators(ksp, &op, NULL));
  PetscCall(MatGetLocalSize(op, &m, &n));
  x->map->n = m;
  y->map->n = n;
  x->map->N = m;
  y->map->N = n;
  PetscCall(KSPSolve(ksp, x, y));
  PetscCall(KSPCheckSolve(ksp, pc, y));
  PetscCall(PetscLogEventEnd(PC_Patch_Solve, pc, 0, 0, 0));
  if (!patch->save_operators) {
    PC pc;
    PetscCall(KSPSetOperators(ksp, NULL, NULL));
    PetscCall(KSPGetPC(ksp, &pc));
    /* Destroy PC context too, otherwise the factored matrix hangs around. */
    PetscCall(PCReset(pc));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCUpdateMultiplicative_PATCH_Linear(PC pc, PetscInt i, PetscInt pStart)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  Mat       multMat;
  PetscInt  n, m;

  PetscFunctionBegin;
  if (patch->save_operators) {
    multMat = patch->matWithArtificial[i];
  } else {
    /*Very inefficient, hopefully we can just assemble the rectangular matrix in the first place.*/
    Mat      matSquare;
    PetscInt dof;
    IS       rowis;
    PetscCall(PCPatchCreateMatrix_Private(pc, i, &matSquare, PETSC_TRUE));
    PetscCall(PCPatchComputeOperator_Internal(pc, NULL, matSquare, i, PETSC_TRUE));
    PetscCall(MatGetSize(matSquare, &dof, NULL));
    PetscCall(ISCreateStride(PETSC_COMM_SELF, dof, 0, 1, &rowis));
    PetscCall(MatCreateSubMatrix(matSquare, rowis, patch->dofMappingWithoutToWithArtificial[i], MAT_INITIAL_MATRIX, &multMat));
    PetscCall(MatDestroy(&matSquare));
    PetscCall(ISDestroy(&rowis));
  }
  /* Disgusting trick to reuse work vectors */
  PetscCall(MatGetLocalSize(multMat, &m, &n));
  patch->patchUpdate->map->n            = n;
  patch->patchRHSWithArtificial->map->n = m;
  patch->patchUpdate->map->N            = n;
  patch->patchRHSWithArtificial->map->N = m;
  PetscCall(MatMult(multMat, patch->patchUpdate, patch->patchRHSWithArtificial));
  PetscCall(VecScale(patch->patchRHSWithArtificial, -1.0));
  PetscCall(PCPatch_ScatterLocal_Private(pc, i + pStart, patch->patchRHSWithArtificial, patch->localRHS, ADD_VALUES, SCATTER_REVERSE, SCATTER_WITHARTIFICIAL));
  if (!patch->save_operators) PetscCall(MatDestroy(&multMat));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCApply_PATCH(PC pc, Vec x, Vec y)
{
  PC_PATCH          *patch        = (PC_PATCH *)pc->data;
  const PetscScalar *globalRHS    = NULL;
  PetscScalar       *localRHS     = NULL;
  PetscScalar       *globalUpdate = NULL;
  const PetscInt    *bcNodes      = NULL;
  PetscInt           nsweep       = patch->symmetrise_sweep ? 2 : 1;
  PetscInt           start[2]     = {0, 0};
  PetscInt           end[2]       = {-1, -1};
  const PetscInt     inc[2]       = {1, -1};
  const PetscScalar *localUpdate;
  const PetscInt    *iterationSet;
  PetscInt           pStart, numBcs, n, sweep, bc, j;

  PetscFunctionBegin;
  PetscCall(PetscLogEventBegin(PC_Patch_Apply, pc, 0, 0, 0));
  PetscCall(PetscOptionsPushCreateViewerOff(PETSC_TRUE));
  /* start, end, inc have 2 entries to manage a second backward sweep if we symmetrize */
  end[0]   = patch->npatch;
  start[1] = patch->npatch - 1;
  if (patch->user_patches) {
    PetscCall(ISGetLocalSize(patch->iterationSet, &end[0]));
    start[1] = end[0] - 1;
    PetscCall(ISGetIndices(patch->iterationSet, &iterationSet));
  }
  /* Scatter from global space into overlapped local spaces */
  PetscCall(VecGetArrayRead(x, &globalRHS));
  PetscCall(VecGetArray(patch->localRHS, &localRHS));
  PetscCall(PetscSFBcastBegin(patch->sectionSF, MPIU_SCALAR, globalRHS, localRHS, MPI_REPLACE));
  PetscCall(PetscSFBcastEnd(patch->sectionSF, MPIU_SCALAR, globalRHS, localRHS, MPI_REPLACE));
  PetscCall(VecRestoreArrayRead(x, &globalRHS));
  PetscCall(VecRestoreArray(patch->localRHS, &localRHS));

  PetscCall(VecSet(patch->localUpdate, 0.0));
  PetscCall(PetscSectionGetChart(patch->gtolCounts, &pStart, NULL));
  PetscCall(PetscLogEventBegin(PC_Patch_Solve, pc, 0, 0, 0));
  for (sweep = 0; sweep < nsweep; sweep++) {
    for (j = start[sweep]; j * inc[sweep] < end[sweep] * inc[sweep]; j += inc[sweep]) {
      PetscInt i = patch->user_patches ? iterationSet[j] : j;
      PetscInt start, len;

      PetscCall(PetscSectionGetDof(patch->gtolCounts, i + pStart, &len));
      PetscCall(PetscSectionGetOffset(patch->gtolCounts, i + pStart, &start));
      /* TODO: Squash out these guys in the setup as well. */
      if (len <= 0) continue;
      /* TODO: Do we need different scatters for X and Y? */
      PetscCall(PCPatch_ScatterLocal_Private(pc, i + pStart, patch->localRHS, patch->patchRHS, INSERT_VALUES, SCATTER_FORWARD, SCATTER_INTERIOR));
      PetscCall((*patch->applysolver)(pc, i, patch->patchRHS, patch->patchUpdate));
      PetscCall(PCPatch_ScatterLocal_Private(pc, i + pStart, patch->patchUpdate, patch->localUpdate, ADD_VALUES, SCATTER_REVERSE, SCATTER_INTERIOR));
      if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) PetscCall((*patch->updatemultiplicative)(pc, i, pStart));
    }
  }
  PetscCall(PetscLogEventEnd(PC_Patch_Solve, pc, 0, 0, 0));
  if (patch->user_patches) PetscCall(ISRestoreIndices(patch->iterationSet, &iterationSet));
  /* XXX: should we do this on the global vector? */
  if (patch->partition_of_unity) PetscCall(VecPointwiseMult(patch->localUpdate, patch->localUpdate, patch->dof_weights));
  /* Now patch->localUpdate contains the solution of the patch solves, so we need to combine them all. */
  PetscCall(VecSet(y, 0.0));
  PetscCall(VecGetArray(y, &globalUpdate));
  PetscCall(VecGetArrayRead(patch->localUpdate, &localUpdate));
  PetscCall(PetscSFReduceBegin(patch->sectionSF, MPIU_SCALAR, localUpdate, globalUpdate, MPI_SUM));
  PetscCall(PetscSFReduceEnd(patch->sectionSF, MPIU_SCALAR, localUpdate, globalUpdate, MPI_SUM));
  PetscCall(VecRestoreArrayRead(patch->localUpdate, &localUpdate));

  /* Now we need to send the global BC values through */
  PetscCall(VecGetArrayRead(x, &globalRHS));
  PetscCall(ISGetSize(patch->globalBcNodes, &numBcs));
  PetscCall(ISGetIndices(patch->globalBcNodes, &bcNodes));
  PetscCall(VecGetLocalSize(x, &n));
  for (bc = 0; bc < numBcs; ++bc) {
    const PetscInt idx = bcNodes[bc];
    if (idx < n) globalUpdate[idx] = globalRHS[idx];
  }

  PetscCall(ISRestoreIndices(patch->globalBcNodes, &bcNodes));
  PetscCall(VecRestoreArrayRead(x, &globalRHS));
  PetscCall(VecRestoreArray(y, &globalUpdate));

  PetscCall(PetscOptionsPopCreateViewerOff());
  PetscCall(PetscLogEventEnd(PC_Patch_Apply, pc, 0, 0, 0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCReset_PATCH_Linear(PC pc)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  PetscInt  i;

  PetscFunctionBegin;
  if (patch->solver) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(KSPReset((KSP)patch->solver[i]));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCReset_PATCH(PC pc)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  PetscInt  i;

  PetscFunctionBegin;
  PetscCall(PetscSFDestroy(&patch->sectionSF));
  PetscCall(PetscSectionDestroy(&patch->cellCounts));
  PetscCall(PetscSectionDestroy(&patch->pointCounts));
  PetscCall(PetscSectionDestroy(&patch->cellNumbering));
  PetscCall(PetscSectionDestroy(&patch->gtolCounts));
  PetscCall(ISDestroy(&patch->gtol));
  PetscCall(ISDestroy(&patch->cells));
  PetscCall(ISDestroy(&patch->points));
  PetscCall(ISDestroy(&patch->dofs));
  PetscCall(ISDestroy(&patch->offs));
  PetscCall(PetscSectionDestroy(&patch->patchSection));
  PetscCall(ISDestroy(&patch->ghostBcNodes));
  PetscCall(ISDestroy(&patch->globalBcNodes));
  PetscCall(PetscSectionDestroy(&patch->gtolCountsWithArtificial));
  PetscCall(ISDestroy(&patch->gtolWithArtificial));
  PetscCall(ISDestroy(&patch->dofsWithArtificial));
  PetscCall(ISDestroy(&patch->offsWithArtificial));
  PetscCall(PetscSectionDestroy(&patch->gtolCountsWithAll));
  PetscCall(ISDestroy(&patch->gtolWithAll));
  PetscCall(ISDestroy(&patch->dofsWithAll));
  PetscCall(ISDestroy(&patch->offsWithAll));
  PetscCall(VecDestroy(&patch->cellMats));
  PetscCall(VecDestroy(&patch->intFacetMats));
  PetscCall(ISDestroy(&patch->allCells));
  PetscCall(ISDestroy(&patch->intFacets));
  PetscCall(ISDestroy(&patch->extFacets));
  PetscCall(ISDestroy(&patch->intFacetsToPatchCell));
  PetscCall(PetscSectionDestroy(&patch->intFacetCounts));
  PetscCall(PetscSectionDestroy(&patch->extFacetCounts));

  if (patch->dofSection)
    for (i = 0; i < patch->nsubspaces; i++) PetscCall(PetscSectionDestroy(&patch->dofSection[i]));
  PetscCall(PetscFree(patch->dofSection));
  PetscCall(PetscFree(patch->bs));
  PetscCall(PetscFree(patch->nodesPerCell));
  if (patch->cellNodeMap)
    for (i = 0; i < patch->nsubspaces; i++) PetscCall(PetscFree(patch->cellNodeMap[i]));
  PetscCall(PetscFree(patch->cellNodeMap));
  PetscCall(PetscFree(patch->subspaceOffsets));

  PetscCall((*patch->resetsolver)(pc));

  if (patch->subspaces_to_exclude) PetscCall(PetscHSetIDestroy(&patch->subspaces_to_exclude));

  PetscCall(VecDestroy(&patch->localRHS));
  PetscCall(VecDestroy(&patch->localUpdate));
  PetscCall(VecDestroy(&patch->patchRHS));
  PetscCall(VecDestroy(&patch->patchUpdate));
  PetscCall(VecDestroy(&patch->dof_weights));
  if (patch->patch_dof_weights) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(VecDestroy(&patch->patch_dof_weights[i]));
    PetscCall(PetscFree(patch->patch_dof_weights));
  }
  if (patch->mat) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(MatDestroy(&patch->mat[i]));
    PetscCall(PetscFree(patch->mat));
  }
  if (patch->matWithArtificial && !patch->isNonlinear) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(MatDestroy(&patch->matWithArtificial[i]));
    PetscCall(PetscFree(patch->matWithArtificial));
  }
  PetscCall(VecDestroy(&patch->patchRHSWithArtificial));
  if (patch->dofMappingWithoutToWithArtificial) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(ISDestroy(&patch->dofMappingWithoutToWithArtificial[i]));
    PetscCall(PetscFree(patch->dofMappingWithoutToWithArtificial));
  }
  if (patch->dofMappingWithoutToWithAll) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(ISDestroy(&patch->dofMappingWithoutToWithAll[i]));
    PetscCall(PetscFree(patch->dofMappingWithoutToWithAll));
  }
  PetscCall(PetscFree(patch->sub_mat_type));
  if (patch->userIS) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(ISDestroy(&patch->userIS[i]));
    PetscCall(PetscFree(patch->userIS));
  }
  PetscCall(PetscFree(patch->precomputedTensorLocations));
  PetscCall(PetscFree(patch->precomputedIntFacetTensorLocations));

  patch->bs          = NULL;
  patch->cellNodeMap = NULL;
  patch->nsubspaces  = 0;
  PetscCall(ISDestroy(&patch->iterationSet));

  PetscCall(PetscViewerDestroy(&patch->viewerCells));
  PetscCall(PetscViewerDestroy(&patch->viewerIntFacets));
  PetscCall(PetscViewerDestroy(&patch->viewerPoints));
  PetscCall(PetscViewerDestroy(&patch->viewerSection));
  PetscCall(PetscViewerDestroy(&patch->viewerMatrix));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCDestroy_PATCH_Linear(PC pc)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;
  PetscInt  i;

  PetscFunctionBegin;
  if (patch->solver) {
    for (i = 0; i < patch->npatch; ++i) PetscCall(KSPDestroy((KSP *)&patch->solver[i]));
    PetscCall(PetscFree(patch->solver));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCDestroy_PATCH(PC pc)
{
  PC_PATCH *patch = (PC_PATCH *)pc->data;

  PetscFunctionBegin;
  PetscCall(PCReset_PATCH(pc));
  PetscCall((*patch->destroysolver)(pc));
  PetscCall(PetscFree(pc->data));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCSetFromOptions_PATCH(PC pc, PetscOptionItems PetscOptionsObject)
{
  PC_PATCH            *patch                 = (PC_PATCH *)pc->data;
  PCPatchConstructType patchConstructionType = PC_PATCH_STAR;
  char                 sub_mat_type[PETSC_MAX_PATH_LEN];
  char                 option[PETSC_MAX_PATH_LEN];
  const char          *prefix;
  PetscBool            flg, dimflg, codimflg;
  MPI_Comm             comm;
  PetscInt            *ifields, nfields, k;
  PCCompositeType      loctype = PC_COMPOSITE_ADDITIVE;

  PetscFunctionBegin;
  PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
  PetscCall(PetscObjectGetOptionsPrefix((PetscObject)pc, &prefix));
  PetscOptionsHeadBegin(PetscOptionsObject, "Patch solver options");

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_save_operators", patch->classname));
  PetscCall(PetscOptionsBool(option, "Store all patch operators for lifetime of object?", "PCPatchSetSaveOperators", patch->save_operators, &patch->save_operators, &flg));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_precompute_element_tensors", patch->classname));
  PetscCall(PetscOptionsBool(option, "Compute each element tensor only once?", "PCPatchSetPrecomputeElementTensors", patch->precomputeElementTensors, &patch->precomputeElementTensors, &flg));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_partition_of_unity", patch->classname));
  PetscCall(PetscOptionsBool(option, "Weight contributions by dof multiplicity?", "PCPatchSetPartitionOfUnity", patch->partition_of_unity, &patch->partition_of_unity, &flg));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_local_type", patch->classname));
  PetscCall(PetscOptionsEnum(option, "Type of local solver composition (additive or multiplicative)", "PCPatchSetLocalComposition", PCCompositeTypes, (PetscEnum)loctype, (PetscEnum *)&loctype, &flg));
  if (flg) PetscCall(PCPatchSetLocalComposition(pc, loctype));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_dense_inverse", patch->classname));
  PetscCall(PetscOptionsBool(option, "Compute inverses of patch matrices and apply directly? Ignores KSP/PC settings on patch.", "PCPatchSetDenseInverse", patch->denseinverse, &patch->denseinverse, &flg));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_construct_dim", patch->classname));
  PetscCall(PetscOptionsInt(option, "What dimension of mesh point to construct patches by? (0 = vertices)", "PCPATCH", patch->dim, &patch->dim, &dimflg));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_construct_codim", patch->classname));
  PetscCall(PetscOptionsInt(option, "What co-dimension of mesh point to construct patches by? (0 = cells)", "PCPATCH", patch->codim, &patch->codim, &codimflg));
  PetscCheck(!dimflg || !codimflg, comm, PETSC_ERR_ARG_WRONG, "Can only set one of dimension or co-dimension");

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_construct_type", patch->classname));
  PetscCall(PetscOptionsEnum(option, "How should the patches be constructed?", "PCPatchSetConstructType", PCPatchConstructTypes, (PetscEnum)patchConstructionType, (PetscEnum *)&patchConstructionType, &flg));
  if (flg) PetscCall(PCPatchSetConstructType(pc, patchConstructionType, NULL, NULL));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_vanka_dim", patch->classname));
  PetscCall(PetscOptionsInt(option, "Topological dimension of entities for Vanka to ignore", "PCPATCH", patch->vankadim, &patch->vankadim, &flg));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_ignore_dim", patch->classname));
  PetscCall(PetscOptionsInt(option, "Topological dimension of entities for completion to ignore", "PCPATCH", patch->ignoredim, &patch->ignoredim, &flg));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_pardecomp_overlap", patch->classname));
  PetscCall(PetscOptionsInt(option, "What overlap should we use in construct type pardecomp?", "PCPATCH", patch->pardecomp_overlap, &patch->pardecomp_overlap, &flg));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_sub_mat_type", patch->classname));
  PetscCall(PetscOptionsFList(option, "Matrix type for patch solves", "PCPatchSetSubMatType", MatList, NULL, sub_mat_type, PETSC_MAX_PATH_LEN, &flg));
  if (flg) PetscCall(PCPatchSetSubMatType(pc, sub_mat_type));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_symmetrise_sweep", patch->classname));
  PetscCall(PetscOptionsBool(option, "Go start->end, end->start?", "PCPATCH", patch->symmetrise_sweep, &patch->symmetrise_sweep, &flg));

  /* If the user has set the number of subspaces, use that for the buffer size,
   otherwise use a large number */
  if (patch->nsubspaces <= 0) {
    nfields = 128;
  } else {
    nfields = patch->nsubspaces;
  }
  PetscCall(PetscMalloc1(nfields, &ifields));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_exclude_subspaces", patch->classname));
  PetscCall(PetscOptionsGetIntArray(((PetscObject)pc)->options, ((PetscObject)pc)->prefix, option, ifields, &nfields, &flg));
  PetscCheck(!flg || !(patchConstructionType == PC_PATCH_USER), comm, PETSC_ERR_ARG_INCOMP, "We cannot support excluding a subspace with user patches because we do not index patches with a mesh point");
  if (flg) {
    PetscCall(PetscHSetIClear(patch->subspaces_to_exclude));
    for (k = 0; k < nfields; k++) PetscCall(PetscHSetIAdd(patch->subspaces_to_exclude, ifields[k]));
  }
  PetscCall(PetscFree(ifields));

  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_patches_view", patch->classname));
  PetscCall(PetscOptionsBool(option, "Print out information during patch construction", "PCPATCH", patch->viewPatches, &patch->viewPatches, &flg));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_cells_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerCells, &patch->formatCells, &patch->viewCells));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_interior_facets_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerIntFacets, &patch->formatIntFacets, &patch->viewIntFacets));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_exterior_facets_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerExtFacets, &patch->formatExtFacets, &patch->viewExtFacets));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_points_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerPoints, &patch->formatPoints, &patch->viewPoints));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_section_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerSection, &patch->formatSection, &patch->viewSection));
  PetscCall(PetscSNPrintf(option, PETSC_MAX_PATH_LEN, "-%s_patch_mat_view", patch->classname));
  PetscCall(PetscOptionsCreateViewer(comm, ((PetscObject)pc)->options, prefix, option, &patch->viewerMatrix, &patch->formatMatrix, &patch->viewMatrix));
  PetscOptionsHeadEnd();
  patch->optionsSet = PETSC_TRUE;
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCSetUpOnBlocks_PATCH(PC pc)
{
  PC_PATCH          *patch = (PC_PATCH *)pc->data;
  KSPConvergedReason reason;
  PetscInt           i;

  PetscFunctionBegin;
  if (!patch->save_operators) {
    /* Can't do this here because the sub KSPs don't have an operator attached yet. */
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  if (patch->denseinverse) {
    /* No solvers */
    PetscFunctionReturn(PETSC_SUCCESS);
  }
  for (i = 0; i < patch->npatch; ++i) {
    if (!((KSP)patch->solver[i])->setfromoptionscalled) PetscCall(KSPSetFromOptions((KSP)patch->solver[i]));
    PetscCall(KSPSetUp((KSP)patch->solver[i]));
    PetscCall(KSPGetConvergedReason((KSP)patch->solver[i], &reason));
    if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode PCView_PATCH(PC pc, PetscViewer viewer)
{
  PC_PATCH   *patch = (PC_PATCH *)pc->data;
  PetscViewer sviewer;
  PetscBool   isascii;
  PetscMPIInt rank;

  PetscFunctionBegin;
  /* TODO Redo tabbing with set tbas in new style */
  PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
  if (!isascii) PetscFunctionReturn(PETSC_SUCCESS);
  PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
  PetscCall(PetscViewerASCIIPushTab(viewer));
  PetscCall(PetscViewerASCIIPrintf(viewer, "Subspace Correction preconditioner with %" PetscInt_FMT " patches\n", patch->npatch));
  if (patch->local_composition_type == PC_COMPOSITE_MULTIPLICATIVE) {
    PetscCall(PetscViewerASCIIPrintf(viewer, "Schwarz type: multiplicative\n"));
  } else {
    PetscCall(PetscViewerASCIIPrintf(viewer, "Schwarz type: additive\n"));
  }
  if (patch->partition_of_unity) PetscCall(PetscViewerASCIIPrintf(viewer, "Weighting by partition of unity\n"));
  else PetscCall(PetscViewerASCIIPrintf(viewer, "Not weighting by partition of unity\n"));
  if (patch->symmetrise_sweep) PetscCall(PetscViewerASCIIPrintf(viewer, "Symmetrising sweep (start->end, then end->start)\n"));
  else PetscCall(PetscViewerASCIIPrintf(viewer, "Not symmetrising sweep\n"));
  if (!patch->precomputeElementTensors) PetscCall(PetscViewerASCIIPrintf(viewer, "Not precomputing element tensors (overlapping cells rebuilt in every patch assembly)\n"));
  else PetscCall(PetscViewerASCIIPrintf(viewer, "Precomputing element tensors (each cell assembled only once)\n"));
  if (!patch->save_operators) PetscCall(PetscViewerASCIIPrintf(viewer, "Not saving patch operators (rebuilt every PCApply)\n"));
  else PetscCall(PetscViewerASCIIPrintf(viewer, "Saving patch operators (rebuilt every PCSetUp)\n"));
  if (patch->patchconstructop == PCPatchConstruct_Star) PetscCall(PetscViewerASCIIPrintf(viewer, "Patch construction operator: star\n"));
  else if (patch->patchconstructop == PCPatchConstruct_Vanka) PetscCall(PetscViewerASCIIPrintf(viewer, "Patch construction operator: Vanka\n"));
  else if (patch->patchconstructop == PCPatchConstruct_User) PetscCall(PetscViewerASCIIPrintf(viewer, "Patch construction operator: user-specified\n"));
  else PetscCall(PetscViewerASCIIPrintf(viewer, "Patch construction operator: unknown\n"));

  if (patch->denseinverse) {
    PetscCall(PetscViewerASCIIPrintf(viewer, "Explicitly forming dense inverse and applying patch solver via MatMult.\n"));
  } else {
    if (patch->isNonlinear) {
      PetscCall(PetscViewerASCIIPrintf(viewer, "SNES on patches (all same):\n"));
    } else {
      PetscCall(PetscViewerASCIIPrintf(viewer, "KSP on patches (all same):\n"));
    }
    if (patch->solver) {
      PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
      if (rank == 0) {
        PetscCall(PetscViewerASCIIPushTab(sviewer));
        PetscCall(PetscObjectView(patch->solver[0], sviewer));
        PetscCall(PetscViewerASCIIPopTab(sviewer));
      }
      PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
    } else {
      PetscCall(PetscViewerASCIIPushTab(viewer));
      PetscCall(PetscViewerASCIIPrintf(viewer, "Solver not yet set.\n"));
      PetscCall(PetscViewerASCIIPopTab(viewer));
    }
  }
  PetscCall(PetscViewerASCIIPopTab(viewer));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*MC
   PCPATCH - A `PC` object that encapsulates flexible definition of blocks for overlapping and non-overlapping
   small block additive preconditioners. Block definition is based on topology from
   a `DM` and equation numbering from a `PetscSection`.

   Options Database Keys:
+ -pc_patch_cells_view   - Views the process local cell numbers for each patch
. -pc_patch_points_view  - Views the process local mesh point numbers for each patch
. -pc_patch_g2l_view     - Views the map between global dofs and patch local dofs for each patch
. -pc_patch_patches_view - Views the global dofs associated with each patch and its boundary
- -pc_patch_sub_mat_view - Views the matrix associated with each patch

   Level: intermediate

.seealso: [](ch_ksp), `PCType`, `PCCreate()`, `PCSetType()`, `PCASM`, `PCJACOBI`, `PCPBJACOBI`, `PCVPBJACOBI`, `SNESPATCH`
M*/
PETSC_EXTERN PetscErrorCode PCCreate_Patch(PC pc)
{
  PC_PATCH *patch;

  PetscFunctionBegin;
  PetscCall(PetscCitationsRegister(PCPatchCitation, &PCPatchcite));
  PetscCall(PetscNew(&patch));

  if (patch->subspaces_to_exclude) PetscCall(PetscHSetIDestroy(&patch->subspaces_to_exclude));
  PetscCall(PetscHSetICreate(&patch->subspaces_to_exclude));

  patch->classname   = "pc";
  patch->isNonlinear = PETSC_FALSE;

  /* Set some defaults */
  patch->combined                 = PETSC_FALSE;
  patch->save_operators           = PETSC_TRUE;
  patch->local_composition_type   = PC_COMPOSITE_ADDITIVE;
  patch->precomputeElementTensors = PETSC_FALSE;
  patch->partition_of_unity       = PETSC_FALSE;
  patch->codim                    = -1;
  patch->dim                      = -1;
  patch->vankadim                 = -1;
  patch->ignoredim                = -1;
  patch->pardecomp_overlap        = 0;
  patch->patchconstructop         = PCPatchConstruct_Star;
  patch->symmetrise_sweep         = PETSC_FALSE;
  patch->npatch                   = 0;
  patch->userIS                   = NULL;
  patch->optionsSet               = PETSC_FALSE;
  patch->iterationSet             = NULL;
  patch->user_patches             = PETSC_FALSE;
  PetscCall(PetscStrallocpy(MATDENSE, (char **)&patch->sub_mat_type));
  patch->viewPatches                       = PETSC_FALSE;
  patch->viewCells                         = PETSC_FALSE;
  patch->viewPoints                        = PETSC_FALSE;
  patch->viewSection                       = PETSC_FALSE;
  patch->viewMatrix                        = PETSC_FALSE;
  patch->densesolve                        = NULL;
  patch->setupsolver                       = PCSetUp_PATCH_Linear;
  patch->applysolver                       = PCApply_PATCH_Linear;
  patch->resetsolver                       = PCReset_PATCH_Linear;
  patch->destroysolver                     = PCDestroy_PATCH_Linear;
  patch->updatemultiplicative              = PCUpdateMultiplicative_PATCH_Linear;
  patch->dofMappingWithoutToWithArtificial = NULL;
  patch->dofMappingWithoutToWithAll        = NULL;

  pc->data                 = (void *)patch;
  pc->ops->apply           = PCApply_PATCH;
  pc->ops->applytranspose  = NULL; /* PCApplyTranspose_PATCH; */
  pc->ops->setup           = PCSetUp_PATCH;
  pc->ops->reset           = PCReset_PATCH;
  pc->ops->destroy         = PCDestroy_PATCH;
  pc->ops->setfromoptions  = PCSetFromOptions_PATCH;
  pc->ops->setuponblocks   = PCSetUpOnBlocks_PATCH;
  pc->ops->view            = PCView_PATCH;
  pc->ops->applyrichardson = NULL;
  PetscFunctionReturn(PETSC_SUCCESS);
}
