static char help[] = "Copy of ex5.c\n";

/* ------------------------------------------------------------------------

  Copy of ex5.c.
  Once PETSc test harness supports conditional linking, we can remove this duplicate.
  See https://gitlab.com/petsc/petsc/-/issues/1173
  ------------------------------------------------------------------------- */

/*
   Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
   Include "petscsnes.h" so that we can use SNES solvers.  Note that this
*/
#include <petscdm.h>
#include <petscdmda.h>
#include <petscsnes.h>
#include <petscmatlab.h>
#include <petsc/private/snesimpl.h> /* For SNES_Solve event */
#include "ex55.h"

/* ------------------------------------------------------------------- */
/*
   FormInitialGuess - Forms initial approximation.

   Input Parameters:
   da - The DM
   user - user-defined application context

   Output Parameter:
   X - vector
 */
static PetscErrorCode FormInitialGuess(DM da, AppCtx *user, Vec X)
{
  PetscInt      i, j, Mx, My, xs, ys, xm, ym;
  PetscReal     lambda, temp1, temp, hx, hy;
  PetscScalar **x;

  PetscFunctionBeginUser;
  PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, &My, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));

  lambda = user->param;
  hx     = 1.0 / (PetscReal)(Mx - 1);
  hy     = 1.0 / (PetscReal)(My - 1);
  temp1  = lambda / (lambda + 1.0);

  /*
     Get a pointer to vector data.
       - For default PETSc vectors, VecGetArray() returns a pointer to
         the data array.  Otherwise, the routine is implementation dependent.
       - You MUST call VecRestoreArray() when you no longer need access to
         the array.
  */
  PetscCall(DMDAVecGetArray(da, X, &x));

  /*
     Get local grid boundaries (for 2-dimensional DMDA):
       xs, ys   - starting grid indices (no ghost points)
       xm, ym   - widths of local grid (no ghost points)

  */
  PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL));

  /*
     Compute initial guess over the locally owned part of the grid
  */
  for (j = ys; j < ys + ym; j++) {
    temp = (PetscReal)(PetscMin(j, My - j - 1)) * hy;
    for (i = xs; i < xs + xm; i++) {
      if (i == 0 || j == 0 || i == Mx - 1 || j == My - 1) {
        /* boundary conditions are all zero Dirichlet */
        x[j][i] = 0.0;
      } else {
        x[j][i] = temp1 * PetscSqrtReal(PetscMin((PetscReal)(PetscMin(i, Mx - i - 1)) * hx, temp));
      }
    }
  }

  /*
     Restore vector
  */
  PetscCall(DMDAVecRestoreArray(da, X, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
  FormExactSolution - Forms MMS solution

  Input Parameters:
  da - The DM
  user - user-defined application context

  Output Parameter:
  X - vector
 */
static PetscErrorCode FormExactSolution(DM da, AppCtx *user, Vec U)
{
  DM            coordDA;
  Vec           coordinates;
  DMDACoor2d  **coords;
  PetscScalar **u;
  PetscInt      xs, ys, xm, ym, i, j;

  PetscFunctionBeginUser;
  PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL));
  PetscCall(DMGetCoordinateDM(da, &coordDA));
  PetscCall(DMGetCoordinates(da, &coordinates));
  PetscCall(DMDAVecGetArray(coordDA, coordinates, &coords));
  PetscCall(DMDAVecGetArray(da, U, &u));
  for (j = ys; j < ys + ym; ++j) {
    for (i = xs; i < xs + xm; ++i) PetscCall(user->mms_solution(user, &coords[j][i], &u[j][i]));
  }
  PetscCall(DMDAVecRestoreArray(da, U, &u));
  PetscCall(DMDAVecRestoreArray(coordDA, coordinates, &coords));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode ZeroBCSolution(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
{
  u[0] = 0.;
  return PETSC_SUCCESS;
}

/* The functions below evaluate the MMS solution u(x,y) and associated forcing

     f(x,y) = -u_xx - u_yy - lambda exp(u)

  such that u(x,y) is an exact solution with f(x,y) as the right-hand side forcing term.
 */
static PetscErrorCode MMSSolution1(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  u[0] = x * (1 - x) * y * (1 - y);
  PetscCall(PetscLogFlops(5));
  return PETSC_SUCCESS;
}
static PetscErrorCode MMSForcing1(AppCtx *user, const DMDACoor2d *c, PetscScalar *f)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  f[0] = 2 * x * (1 - x) + 2 * y * (1 - y) - user->param * PetscExpReal(x * (1 - x) * y * (1 - y));
  return PETSC_SUCCESS;
}

static PetscErrorCode MMSSolution2(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  u[0] = PetscSinReal(PETSC_PI * x) * PetscSinReal(PETSC_PI * y);
  PetscCall(PetscLogFlops(5));
  return PETSC_SUCCESS;
}
static PetscErrorCode MMSForcing2(AppCtx *user, const DMDACoor2d *c, PetscScalar *f)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  f[0] = 2 * PetscSqr(PETSC_PI) * PetscSinReal(PETSC_PI * x) * PetscSinReal(PETSC_PI * y) - user->param * PetscExpReal(PetscSinReal(PETSC_PI * x) * PetscSinReal(PETSC_PI * y));
  return PETSC_SUCCESS;
}

static PetscErrorCode MMSSolution3(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  u[0] = PetscSinReal(user->m * PETSC_PI * x * (1 - y)) * PetscSinReal(user->n * PETSC_PI * y * (1 - x));
  PetscCall(PetscLogFlops(5));
  return PETSC_SUCCESS;
}
static PetscErrorCode MMSForcing3(AppCtx *user, const DMDACoor2d *c, PetscScalar *f)
{
  PetscReal x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  PetscReal m = user->m, n = user->n, lambda = user->param;
  f[0] = (-(PetscExpReal(PetscSinReal(m * PETSC_PI * x * (1 - y)) * PetscSinReal(n * PETSC_PI * (1 - x) * y)) * lambda) + PetscSqr(PETSC_PI) * (-2 * m * n * ((-1 + x) * x + (-1 + y) * y) * PetscCosReal(m * PETSC_PI * x * (-1 + y)) * PetscCosReal(n * PETSC_PI * (-1 + x) * y) + (PetscSqr(m) * (PetscSqr(x) + PetscSqr(-1 + y)) + PetscSqr(n) * (PetscSqr(-1 + x) + PetscSqr(y))) * PetscSinReal(m * PETSC_PI * x * (-1 + y)) * PetscSinReal(n * PETSC_PI * (-1 + x) * y)));
  return PETSC_SUCCESS;
}

static PetscErrorCode MMSSolution4(AppCtx *user, const DMDACoor2d *c, PetscScalar *u)
{
  const PetscReal Lx = 1., Ly = 1.;
  PetscReal       x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  u[0] = (PetscPowReal(x, 4) - PetscSqr(Lx) * PetscSqr(x)) * (PetscPowReal(y, 4) - PetscSqr(Ly) * PetscSqr(y));
  PetscCall(PetscLogFlops(9));
  return PETSC_SUCCESS;
}
static PetscErrorCode MMSForcing4(AppCtx *user, const DMDACoor2d *c, PetscScalar *f)
{
  const PetscReal Lx = 1., Ly = 1.;
  PetscReal       x = PetscRealPart(c->x), y = PetscRealPart(c->y);
  f[0] = (2 * PetscSqr(x) * (PetscSqr(x) - PetscSqr(Lx)) * (PetscSqr(Ly) - 6 * PetscSqr(y)) + 2 * PetscSqr(y) * (PetscSqr(Lx) - 6 * PetscSqr(x)) * (PetscSqr(y) - PetscSqr(Ly)) - user->param * PetscExpReal((PetscPowReal(x, 4) - PetscSqr(Lx) * PetscSqr(x)) * (PetscPowReal(y, 4) - PetscSqr(Ly) * PetscSqr(y))));
  return PETSC_SUCCESS;
}

/* ------------------------------------------------------------------- */
/*
   FormFunctionLocal - Evaluates nonlinear function, F(x) on local process patch

 */
static PetscErrorCode FormFunctionLocal(DMDALocalInfo *info, PetscScalar **x, PetscScalar **f, AppCtx *user)
{
  PetscInt    i, j;
  PetscReal   lambda, hx, hy, hxdhy, hydhx;
  PetscScalar u, ue, uw, un, us, uxx, uyy, mms_solution, mms_forcing;
  DMDACoor2d  c;

  PetscFunctionBeginUser;
  lambda = user->param;
  hx     = 1.0 / (PetscReal)(info->mx - 1);
  hy     = 1.0 / (PetscReal)(info->my - 1);
  hxdhy  = hx / hy;
  hydhx  = hy / hx;
  /*
     Compute function over the locally owned part of the grid
  */
  for (j = info->ys; j < info->ys + info->ym; j++) {
    for (i = info->xs; i < info->xs + info->xm; i++) {
      if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
        c.x = i * hx;
        c.y = j * hy;
        PetscCall(user->mms_solution(user, &c, &mms_solution));
        f[j][i] = 2.0 * (hydhx + hxdhy) * (x[j][i] - mms_solution);
      } else {
        u  = x[j][i];
        uw = x[j][i - 1];
        ue = x[j][i + 1];
        un = x[j - 1][i];
        us = x[j + 1][i];

        /* Enforce boundary conditions at neighboring points -- setting these values causes the Jacobian to be symmetric. */
        if (i - 1 == 0) {
          c.x = (i - 1) * hx;
          c.y = j * hy;
          PetscCall(user->mms_solution(user, &c, &uw));
        }
        if (i + 1 == info->mx - 1) {
          c.x = (i + 1) * hx;
          c.y = j * hy;
          PetscCall(user->mms_solution(user, &c, &ue));
        }
        if (j - 1 == 0) {
          c.x = i * hx;
          c.y = (j - 1) * hy;
          PetscCall(user->mms_solution(user, &c, &un));
        }
        if (j + 1 == info->my - 1) {
          c.x = i * hx;
          c.y = (j + 1) * hy;
          PetscCall(user->mms_solution(user, &c, &us));
        }

        uxx         = (2.0 * u - uw - ue) * hydhx;
        uyy         = (2.0 * u - un - us) * hxdhy;
        mms_forcing = 0;
        c.x         = i * hx;
        c.y         = j * hy;
        if (user->mms_forcing) PetscCall(user->mms_forcing(user, &c, &mms_forcing));
        f[j][i] = uxx + uyy - hx * hy * (lambda * PetscExpScalar(u) + mms_forcing);
      }
    }
  }
  PetscCall(PetscLogFlops(11.0 * info->ym * info->xm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/* FormObjectiveLocal - Evaluates nonlinear function, F(x) on local process patch */
static PetscErrorCode FormObjectiveLocal(DMDALocalInfo *info, PetscScalar **x, PetscReal *obj, AppCtx *user)
{
  PetscInt    i, j;
  PetscReal   lambda, hx, hy, hxdhy, hydhx, sc, lobj = 0;
  PetscScalar u, ue, uw, un, us, uxux, uyuy;
  MPI_Comm    comm;

  PetscFunctionBeginUser;
  *obj = 0;
  PetscCall(PetscObjectGetComm((PetscObject)info->da, &comm));
  lambda = user->param;
  hx     = 1.0 / (PetscReal)(info->mx - 1);
  hy     = 1.0 / (PetscReal)(info->my - 1);
  sc     = hx * hy * lambda;
  hxdhy  = hx / hy;
  hydhx  = hy / hx;
  /*
     Compute function over the locally owned part of the grid
  */
  for (j = info->ys; j < info->ys + info->ym; j++) {
    for (i = info->xs; i < info->xs + info->xm; i++) {
      if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
        lobj += PetscRealPart((hydhx + hxdhy) * x[j][i] * x[j][i]);
      } else {
        u  = x[j][i];
        uw = x[j][i - 1];
        ue = x[j][i + 1];
        un = x[j - 1][i];
        us = x[j + 1][i];

        if (i - 1 == 0) uw = 0.;
        if (i + 1 == info->mx - 1) ue = 0.;
        if (j - 1 == 0) un = 0.;
        if (j + 1 == info->my - 1) us = 0.;

        /* F[u] = 1/2\int_{\omega}\nabla^2u(x)*u(x)*dx */

        uxux = u * (2. * u - ue - uw) * hydhx;
        uyuy = u * (2. * u - un - us) * hxdhy;

        lobj += PetscRealPart(0.5 * (uxux + uyuy) - sc * PetscExpScalar(u));
      }
    }
  }
  PetscCall(PetscLogFlops(12.0 * info->ym * info->xm));
  *obj = lobj;
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
   FormJacobianLocal - Evaluates Jacobian matrix on local process patch
*/
static PetscErrorCode FormJacobianLocal(DMDALocalInfo *info, PetscScalar **x, Mat jac, Mat jacpre, AppCtx *user)
{
  PetscInt     i, j, k;
  MatStencil   col[5], row;
  PetscScalar  lambda, v[5], hx, hy, hxdhy, hydhx, sc;
  DM           coordDA;
  Vec          coordinates;
  DMDACoor2d **coords;

  PetscFunctionBeginUser;
  lambda = user->param;
  /* Extract coordinates */
  PetscCall(DMGetCoordinateDM(info->da, &coordDA));
  PetscCall(DMGetCoordinates(info->da, &coordinates));
  PetscCall(DMDAVecGetArray(coordDA, coordinates, &coords));
  hx = info->xm > 1 ? PetscRealPart(coords[info->ys][info->xs + 1].x) - PetscRealPart(coords[info->ys][info->xs].x) : 1.0;
  hy = info->ym > 1 ? PetscRealPart(coords[info->ys + 1][info->xs].y) - PetscRealPart(coords[info->ys][info->xs].y) : 1.0;
  PetscCall(DMDAVecRestoreArray(coordDA, coordinates, &coords));
  hxdhy = hx / hy;
  hydhx = hy / hx;
  sc    = hx * hy * lambda;

  /*
     Compute entries for the locally owned part of the Jacobian.
      - Currently, all PETSc parallel matrix formats are partitioned by
        contiguous chunks of rows across the processors.
      - Each processor needs to insert only elements that it owns
        locally (but any non-local elements will be sent to the
        appropriate processor during matrix assembly).
      - Here, we set all entries for a particular row at once.
      - We can set matrix entries either using either
        MatSetValuesLocal() or MatSetValues(), as discussed above.
  */
  for (j = info->ys; j < info->ys + info->ym; j++) {
    for (i = info->xs; i < info->xs + info->xm; i++) {
      row.j = j;
      row.i = i;
      /* boundary points */
      if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
        v[0] = 2.0 * (hydhx + hxdhy);
        PetscCall(MatSetValuesStencil(jacpre, 1, &row, 1, &row, v, INSERT_VALUES));
      } else {
        k = 0;
        /* interior grid points */
        if (j - 1 != 0) {
          v[k]     = -hxdhy;
          col[k].j = j - 1;
          col[k].i = i;
          k++;
        }
        if (i - 1 != 0) {
          v[k]     = -hydhx;
          col[k].j = j;
          col[k].i = i - 1;
          k++;
        }

        v[k]     = 2.0 * (hydhx + hxdhy) - sc * PetscExpScalar(x[j][i]);
        col[k].j = row.j;
        col[k].i = row.i;
        k++;

        if (i + 1 != info->mx - 1) {
          v[k]     = -hydhx;
          col[k].j = j;
          col[k].i = i + 1;
          k++;
        }
        if (j + 1 != info->mx - 1) {
          v[k]     = -hxdhy;
          col[k].j = j + 1;
          col[k].i = i;
          k++;
        }
        PetscCall(MatSetValuesStencil(jacpre, 1, &row, k, col, v, INSERT_VALUES));
      }
    }
  }

  /*
     Assemble matrix, using the 2-step process:
       MatAssemblyBegin(), MatAssemblyEnd().
  */
  PetscCall(MatAssemblyBegin(jacpre, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(jacpre, MAT_FINAL_ASSEMBLY));

  /*
     Tell the matrix we will never add a new nonzero location to the
     matrix. If we do, it will generate an error.
  */
  PetscCall(MatSetOption(jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

static PetscErrorCode FormFunctionMatlab(SNES snes, Vec X, Vec F, void *ptr)
{
#if PetscDefined(HAVE_MATLAB)
  AppCtx   *user = (AppCtx *)ptr;
  PetscInt  Mx, My;
  PetscReal lambda, hx, hy;
  Vec       localX, localF;
  MPI_Comm  comm;
  DM        da;

  PetscFunctionBeginUser;
  PetscCall(SNESGetDM(snes, &da));
  PetscCall(DMGetLocalVector(da, &localX));
  PetscCall(DMGetLocalVector(da, &localF));
  PetscCall(PetscObjectSetName((PetscObject)localX, "localX"));
  PetscCall(PetscObjectSetName((PetscObject)localF, "localF"));
  PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, &My, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));

  lambda = user->param;
  hx     = 1.0 / (PetscReal)(Mx - 1);
  hy     = 1.0 / (PetscReal)(My - 1);

  PetscCall(PetscObjectGetComm((PetscObject)snes, &comm));
  /*
     Scatter ghost points to local vector,using the 2-step process
        DMGlobalToLocalBegin(),DMGlobalToLocalEnd().
     By placing code between these two statements, computations can be
     done while messages are in transition.
  */
  PetscCall(DMGlobalToLocalBegin(da, X, INSERT_VALUES, localX));
  PetscCall(DMGlobalToLocalEnd(da, X, INSERT_VALUES, localX));
  PetscCall(PetscMatlabEnginePut(PETSC_MATLAB_ENGINE_(comm), (PetscObject)localX));
  PetscCall(PetscMatlabEngineEvaluate(PETSC_MATLAB_ENGINE_(comm), "localF=ex5m(localX,%18.16e,%18.16e,%18.16e)", (double)hx, (double)hy, (double)lambda));
  PetscCall(PetscMatlabEngineGet(PETSC_MATLAB_ENGINE_(comm), (PetscObject)localF));

  /*
     Insert values into global vector
  */
  PetscCall(DMLocalToGlobalBegin(da, localF, INSERT_VALUES, F));
  PetscCall(DMLocalToGlobalEnd(da, localF, INSERT_VALUES, F));
  PetscCall(DMRestoreLocalVector(da, &localX));
  PetscCall(DMRestoreLocalVector(da, &localF));
  PetscFunctionReturn(PETSC_SUCCESS);
#else
  return PETSC_SUCCESS; /* Never called */
#endif
}

/* ------------------------------------------------------------------- */
/*
      Applies some sweeps on nonlinear Gauss-Seidel on each process

 */
static PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, PetscCtx ctx)
{
  PetscInt      i, j, k, Mx, My, xs, ys, xm, ym, its, tot_its, sweeps, l;
  PetscReal     lambda, hx, hy, hxdhy, hydhx, sc;
  PetscScalar **x, **b, bij, F, F0 = 0, J, u, un, us, ue, eu, uw, uxx, uyy, y;
  PetscReal     atol, rtol, stol;
  DM            da;
  AppCtx       *user;
  Vec           localX, localB;

  PetscFunctionBeginUser;
  tot_its = 0;
  PetscCall(SNESNGSGetSweeps(snes, &sweeps));
  PetscCall(SNESNGSGetTolerances(snes, &atol, &rtol, &stol, &its));
  PetscCall(SNESGetDM(snes, &da));
  PetscCall(DMGetApplicationContext(da, &user));

  PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, &My, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));

  lambda = user->param;
  hx     = 1.0 / (PetscReal)(Mx - 1);
  hy     = 1.0 / (PetscReal)(My - 1);
  sc     = hx * hy * lambda;
  hxdhy  = hx / hy;
  hydhx  = hy / hx;

  PetscCall(DMGetLocalVector(da, &localX));
  if (B) PetscCall(DMGetLocalVector(da, &localB));
  for (l = 0; l < sweeps; l++) {
    PetscCall(DMGlobalToLocalBegin(da, X, INSERT_VALUES, localX));
    PetscCall(DMGlobalToLocalEnd(da, X, INSERT_VALUES, localX));
    if (B) {
      PetscCall(DMGlobalToLocalBegin(da, B, INSERT_VALUES, localB));
      PetscCall(DMGlobalToLocalEnd(da, B, INSERT_VALUES, localB));
    }
    /*
     Get a pointer to vector data.
     - For default PETSc vectors, VecGetArray() returns a pointer to
     the data array.  Otherwise, the routine is implementation dependent.
     - You MUST call VecRestoreArray() when you no longer need access to
     the array.
     */
    PetscCall(DMDAVecGetArray(da, localX, &x));
    if (B) PetscCall(DMDAVecGetArray(da, localB, &b));
    /*
     Get local grid boundaries (for 2-dimensional DMDA):
     xs, ys   - starting grid indices (no ghost points)
     xm, ym   - widths of local grid (no ghost points)
     */
    PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL));

    for (j = ys; j < ys + ym; j++) {
      for (i = xs; i < xs + xm; i++) {
        if (i == 0 || j == 0 || i == Mx - 1 || j == My - 1) {
          /* boundary conditions are all zero Dirichlet */
          x[j][i] = 0.0;
        } else {
          if (B) bij = b[j][i];
          else bij = 0.;

          u  = x[j][i];
          un = x[j - 1][i];
          us = x[j + 1][i];
          ue = x[j][i - 1];
          uw = x[j][i + 1];

          for (k = 0; k < its; k++) {
            eu  = PetscExpScalar(u);
            uxx = (2.0 * u - ue - uw) * hydhx;
            uyy = (2.0 * u - un - us) * hxdhy;
            F   = uxx + uyy - sc * eu - bij;
            if (k == 0) F0 = F;
            J = 2.0 * (hydhx + hxdhy) - sc * eu;
            y = F / J;
            u -= y;
            tot_its++;

            if (atol > PetscAbsReal(PetscRealPart(F)) || rtol * PetscAbsReal(PetscRealPart(F0)) > PetscAbsReal(PetscRealPart(F)) || stol * PetscAbsReal(PetscRealPart(u)) > PetscAbsReal(PetscRealPart(y))) break;
          }
          x[j][i] = u;
        }
      }
    }
    /*
     Restore vector
     */
    PetscCall(DMDAVecRestoreArray(da, localX, &x));
    PetscCall(DMLocalToGlobalBegin(da, localX, INSERT_VALUES, X));
    PetscCall(DMLocalToGlobalEnd(da, localX, INSERT_VALUES, X));
  }
  PetscCall(PetscLogFlops(tot_its * (21.0)));
  PetscCall(DMRestoreLocalVector(da, &localX));
  if (B) {
    PetscCall(DMDAVecRestoreArray(da, localB, &b));
    PetscCall(DMRestoreLocalVector(da, &localB));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

int main(int argc, char **argv)
{
  SNES      snes; /* nonlinear solver */
  Vec       x;    /* solution vector */
  AppCtx    user; /* user-defined work context */
  PetscInt  its;  /* iterations for convergence */
  PetscReal bratu_lambda_max = 6.81;
  PetscReal bratu_lambda_min = 0.;
  PetscInt  MMS              = 1;
  PetscBool flg, setMMS;
  DM        da;
  Vec       r = NULL;
  KSP       ksp;
  PetscInt  lits, slits;
  PetscBool useKokkos = PETSC_FALSE;

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Initialize program
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  PetscFunctionBeginUser;
  PetscCall(PetscInitialize(&argc, &argv, NULL, help));

  PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_kokkos", &useKokkos, NULL));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Initialize problem parameters
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  user.ncoo  = 0;
  user.param = 6.0;
  PetscCall(PetscOptionsGetReal(NULL, NULL, "-par", &user.param, NULL));
  PetscCheck(user.param <= bratu_lambda_max && user.param >= bratu_lambda_min, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Lambda, %g, is out of range, [%g, %g]", (double)user.param, (double)bratu_lambda_min, (double)bratu_lambda_max);
  PetscCall(PetscOptionsGetInt(NULL, NULL, "-mms", &MMS, &setMMS));
  if (MMS == 3) {
    PetscInt mPar = 2, nPar = 1;
    PetscCall(PetscOptionsGetInt(NULL, NULL, "-m_par", &mPar, NULL));
    PetscCall(PetscOptionsGetInt(NULL, NULL, "-n_par", &nPar, NULL));
    user.m = PetscPowInt(2, mPar);
    user.n = PetscPowInt(2, nPar);
  }

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create nonlinear solver context
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
  PetscCall(SNESSetCountersReset(snes, PETSC_FALSE));
  PetscCall(SNESSetNGS(snes, NonlinearGS, NULL));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create distributed array (DMDA) to manage parallel grid and vectors
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 4, 4, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
  if (useKokkos) {
    PetscCall(DMSetVecType(da, VECKOKKOS));
    PetscCall(DMSetMatType(da, MATAIJKOKKOS));
  }
  PetscCall(DMSetFromOptions(da));
  PetscCall(DMSetUp(da));
  PetscCall(DMDASetUniformCoordinates(da, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0));
  PetscCall(DMSetApplicationContext(da, &user));
  PetscCall(SNESSetDM(snes, da));
  /*  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Extract global vectors from DMDA; then duplicate for remaining
     vectors that are the same types
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMCreateGlobalVector(da, &x));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Set local function evaluation routine
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  switch (MMS) {
  case 0:
    user.mms_solution = ZeroBCSolution;
    user.mms_forcing  = NULL;
    break;
  case 1:
    user.mms_solution = MMSSolution1;
    user.mms_forcing  = MMSForcing1;
    break;
  case 2:
    user.mms_solution = MMSSolution2;
    user.mms_forcing  = MMSForcing2;
    break;
  case 3:
    user.mms_solution = MMSSolution3;
    user.mms_forcing  = MMSForcing3;
    break;
  case 4:
    user.mms_solution = MMSSolution4;
    user.mms_forcing  = MMSForcing4;
    break;
  default:
    SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Unknown MMS type %" PetscInt_FMT, MMS);
  }

  if (useKokkos) {
    PetscCheck(MMS == 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "FormFunctionLocalVec_Kokkos only works with MMS 1");
    PetscCall(DMDASNESSetFunctionLocalVec(da, INSERT_VALUES, (DMDASNESFunctionVecFn *)FormFunctionLocalVec, &user));
  } else {
    PetscCall(DMDASNESSetFunctionLocal(da, INSERT_VALUES, (DMDASNESFunctionFn *)FormFunctionLocal, &user));
  }

  flg = PETSC_FALSE;
  PetscCall(PetscOptionsGetBool(NULL, NULL, "-fd", &flg, NULL));
  if (!flg) {
    if (useKokkos) PetscCall(DMDASNESSetJacobianLocalVec(da, (DMDASNESJacobianVecFn *)FormJacobianLocalVec, &user));
    else PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)FormJacobianLocal, &user));
  }

  flg = PETSC_FALSE;
  PetscCall(PetscOptionsGetBool(NULL, NULL, "-obj", &flg, NULL));
  if (flg) {
    if (useKokkos) PetscCall(DMDASNESSetObjectiveLocalVec(da, (DMDASNESObjectiveVecFn *)FormObjectiveLocalVec, &user));
    else PetscCall(DMDASNESSetObjectiveLocal(da, (DMDASNESObjectiveFn *)FormObjectiveLocal, &user));
  }

  if (PetscDefined(HAVE_MATLAB)) {
    PetscBool matlab_function = PETSC_FALSE;
    PetscCall(PetscOptionsGetBool(NULL, NULL, "-matlab_function", &matlab_function, 0));
    if (matlab_function) {
      PetscCall(VecDuplicate(x, &r));
      PetscCall(SNESSetFunction(snes, r, FormFunctionMatlab, &user));
    }
  }

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Customize nonlinear solver; set runtime options
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(SNESSetFromOptions(snes));

  PetscCall(FormInitialGuess(da, &user, x));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Solve nonlinear system
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(SNESSolve(snes, NULL, x));
  PetscCall(SNESGetIterationNumber(snes, &its));

  PetscCall(SNESGetLinearSolveIterations(snes, &slits));
  PetscCall(SNESGetKSP(snes, &ksp));
  PetscCall(KSPGetTotalIterations(ksp, &lits));
  PetscCheck(lits == slits, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Number of total linear iterations reported by SNES %" PetscInt_FMT " does not match reported by KSP %" PetscInt_FMT, slits, lits);
  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     If using MMS, check the l_2 error
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  if (setMMS) {
    Vec       e;
    PetscReal errorl2, errorinf;
    PetscInt  N;

    PetscCall(VecDuplicate(x, &e));
    PetscCall(PetscObjectViewFromOptions((PetscObject)x, NULL, "-sol_view"));
    PetscCall(FormExactSolution(da, &user, e));
    PetscCall(PetscObjectViewFromOptions((PetscObject)e, NULL, "-exact_view"));
    PetscCall(VecAXPY(e, -1.0, x));
    PetscCall(PetscObjectViewFromOptions((PetscObject)e, NULL, "-error_view"));
    PetscCall(VecNorm(e, NORM_2, &errorl2));
    PetscCall(VecNorm(e, NORM_INFINITY, &errorinf));
    PetscCall(VecGetSize(e, &N));
    PetscCall(PetscPrintf(PETSC_COMM_WORLD, "N: %" PetscInt_FMT " error L2 %g inf %g\n", N, (double)(errorl2 / PetscSqrtReal((PetscReal)N)), (double)errorinf));
    PetscCall(VecDestroy(&e));
    PetscCall(PetscLogEventSetDof(SNES_Solve, 0, N));
    PetscCall(PetscLogEventSetError(SNES_Solve, 0, errorl2 / PetscSqrtReal(N)));
  }

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Free work space.  All PETSc objects should be destroyed when they
     are no longer needed.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(VecDestroy(&r));
  PetscCall(VecDestroy(&x));
  PetscCall(SNESDestroy(&snes));
  PetscCall(DMDestroy(&da));
  PetscCall(PetscFinalize());
  return 0;
}

/*TEST
  build:
    requires: !windows_compilers
    depends: ex55k.kokkos.cxx

  testset:
    output_file: output/ex55_asm_0.out
    requires: !single
    args: -mms 1 -par 0.0 -snes_monitor_short -snes_converged_reason -ksp_rtol 1.0e-9 -ksp_monitor_short -ksp_type richardson -pc_type asm -pc_asm_blocks 2 -pc_asm_overlap 0 -pc_asm_local_type additive -sub_pc_type lu
    filter: grep -v "type"

    test:
      suffix: asm_0
      args: -fd {{0 1}}

    test:
      requires: kokkos_kernels
      suffix: asm_0_kok
      args: -use_kokkos -fd {{0 1}}

  testset:
    output_file: output/ex55_1.out
    requires: !single
    args: -snes_monitor
    filter: grep -v "type"

    test:
      suffix: 1
      args: -fd {{0 1}}

    test:
      requires: kokkos_kernels
      suffix: 1_kok
      args: -use_kokkos -fd {{0 1}}

    test:
      requires: h2opus
      suffix: 1_h2opus
      args: -pc_type h2opus -fd {{0 1}}

    test:
      requires: h2opus kokkos_kernels
      suffix: 1_h2opus_k
      args: -use_kokkos -pc_type h2opus -fd {{0 1}}

TEST*/
