static char help[] = "Surface processes in geophysics.\n\n";

#include <petscsnes.h>
#include <petscdm.h>
#include <petscdmda.h>

/*
   User-defined application context - contains data needed by the
   application-provided call-back routines, FormJacobianLocal() and
   FormFunctionLocal().
*/
typedef struct {
  PetscReal D; /* The diffusion coefficient */
  PetscReal K; /* The advection coefficient */
  PetscInt  m; /* Exponent for A */
} AppCtx;

/*
   User-defined routines
*/
extern PetscErrorCode FormFunctionLocal(DMDALocalInfo *, PetscScalar **, PetscScalar **, AppCtx *);
extern PetscErrorCode FormJacobianLocal(DMDALocalInfo *, PetscScalar **, Mat, AppCtx *);

int main(int argc, char **argv)
{
  SNES     snes; /* nonlinear solver */
  AppCtx   user; /* user-defined work context */
  PetscInt its;  /* iterations for convergence */
  DM       da;

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Initialize program
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  PetscFunctionBeginUser;
  PetscCall(PetscInitialize(&argc, &argv, NULL, help));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Initialize problem parameters
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscOptionsBegin(PETSC_COMM_WORLD, "", "Surface Process Problem Options", "SNES");
  user.D = 1.0;
  PetscCall(PetscOptionsReal("-D", "The diffusion coefficient D", __FILE__, user.D, &user.D, NULL));
  user.K = 1.0;
  PetscCall(PetscOptionsReal("-K", "The advection coefficient K", __FILE__, user.K, &user.K, NULL));
  user.m = 1;
  PetscCall(PetscOptionsInt("-m", "The exponent for A", __FILE__, user.m, &user.m, NULL));
  PetscOptionsEnd();

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create distributed array (DMDA) to manage parallel grid and vectors
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 4, 4, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
  PetscCall(DMSetFromOptions(da));
  PetscCall(DMSetUp(da));
  PetscCall(DMDASetUniformCoordinates(da, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0));
  PetscCall(DMSetApplicationContext(da, &user));
  PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
  PetscCall(SNESSetDM(snes, da));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Set local function evaluation routine
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMDASNESSetFunctionLocal(da, INSERT_VALUES, (PetscErrorCode (*)(DMDALocalInfo *, void *, void *, void *))FormFunctionLocal, &user));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Customize solver; set runtime options
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(SNESSetFromOptions(snes));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Solve nonlinear system
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(SNESSolve(snes, 0, 0));
  PetscCall(SNESGetIterationNumber(snes, &its));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Free work space.  All PETSc objects should be destroyed when they
     are no longer needed.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  PetscCall(SNESDestroy(&snes));
  PetscCall(DMDestroy(&da));

  PetscCall(PetscFinalize());
  return 0;
}

PetscScalar funcU(DMDACoor2d *coords)
{
  return coords->x + coords->y;
}

PetscScalar funcA(PetscScalar z, AppCtx *user)
{
  PetscScalar v = 1.0;
  PetscInt    i;

  for (i = 0; i < user->m; ++i) v *= z;
  return v;
}

PetscScalar funcADer(PetscScalar z, AppCtx *user)
{
  PetscScalar v = 1.0;
  PetscInt    i;

  for (i = 0; i < user->m - 1; ++i) v *= z;
  return (PetscScalar)user->m * v;
}

/*
   FormFunctionLocal - Evaluates nonlinear function, F(x).
*/
PetscErrorCode FormFunctionLocal(DMDALocalInfo *info, PetscScalar **x, PetscScalar **f, AppCtx *user)
{
  DM           coordDA;
  Vec          coordinates;
  DMDACoor2d **coords;
  PetscScalar  u, ux, uy, uxx, uyy;
  PetscReal    D, K, hx, hy, hxdhy, hydhx;
  PetscInt     i, j;

  PetscFunctionBeginUser;
  D     = user->D;
  K     = user->K;
  hx    = 1.0 / (PetscReal)(info->mx - 1);
  hy    = 1.0 / (PetscReal)(info->my - 1);
  hxdhy = hx / hy;
  hydhx = hy / hx;
  /*
     Compute function over the locally owned part of the grid
  */
  PetscCall(DMGetCoordinateDM(info->da, &coordDA));
  PetscCall(DMGetCoordinates(info->da, &coordinates));
  PetscCall(DMDAVecGetArray(coordDA, coordinates, &coords));
  for (j = info->ys; j < info->ys + info->ym; j++) {
    for (i = info->xs; i < info->xs + info->xm; i++) {
      if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) f[j][i] = x[j][i];
      else {
        u       = x[j][i];
        ux      = (x[j][i + 1] - x[j][i]) / hx;
        uy      = (x[j + 1][i] - x[j][i]) / hy;
        uxx     = (2.0 * u - x[j][i - 1] - x[j][i + 1]) * hydhx;
        uyy     = (2.0 * u - x[j - 1][i] - x[j + 1][i]) * hxdhy;
        f[j][i] = D * (uxx + uyy) - (K * funcA(x[j][i], user) * PetscSqrtScalar(ux * ux + uy * uy) + funcU(&coords[j][i])) * hx * hy;
        PetscCheck(!PetscIsInfOrNanScalar(f[j][i]), PETSC_COMM_SELF, PETSC_ERR_FP, "Invalid residual: %g", (double)PetscRealPart(f[j][i]));
      }
    }
  }
  PetscCall(DMDAVecRestoreArray(coordDA, coordinates, &coords));
  PetscCall(PetscLogFlops(11.0 * info->ym * info->xm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
   FormJacobianLocal - Evaluates Jacobian matrix.
*/
PetscErrorCode FormJacobianLocal(DMDALocalInfo *info, PetscScalar **x, Mat jac, AppCtx *user)
{
  MatStencil  col[5], row;
  PetscScalar D, K, A, v[5], hx, hy, hxdhy, hydhx, ux, uy;
  PetscReal   normGradZ;
  PetscInt    i, j, k;

  PetscFunctionBeginUser;
  D     = user->D;
  K     = user->K;
  hx    = 1.0 / (PetscReal)(info->mx - 1);
  hy    = 1.0 / (PetscReal)(info->my - 1);
  hxdhy = hx / hy;
  hydhx = hy / hx;

  /*
     Compute entries for the locally owned part of the Jacobian.
      - Currently, all PETSc parallel matrix formats are partitioned by
        contiguous chunks of rows across the processors.
      - Each processor needs to insert only elements that it owns
        locally (but any non-local elements will be sent to the
        appropriate processor during matrix assembly).
      - Here, we set all entries for a particular row at once.
      - We can set matrix entries either using either
        MatSetValuesLocal() or MatSetValues(), as discussed above.
  */
  for (j = info->ys; j < info->ys + info->ym; j++) {
    for (i = info->xs; i < info->xs + info->xm; i++) {
      row.j = j;
      row.i = i;
      if (i == 0 || j == 0 || i == info->mx - 1 || j == info->my - 1) {
        /* boundary points */
        v[0] = 1.0;
        PetscCall(MatSetValuesStencil(jac, 1, &row, 1, &row, v, INSERT_VALUES));
      } else {
        /* interior grid points */
        ux        = (x[j][i + 1] - x[j][i]) / hx;
        uy        = (x[j + 1][i] - x[j][i]) / hy;
        normGradZ = PetscRealPart(PetscSqrtScalar(ux * ux + uy * uy));
        if (normGradZ < 1.0e-8) normGradZ = 1.0e-8;
        A = funcA(x[j][i], user);

        v[0]     = -D * hxdhy;
        col[0].j = j - 1;
        col[0].i = i;
        v[1]     = -D * hydhx;
        col[1].j = j;
        col[1].i = i - 1;
        v[2]     = D * 2.0 * (hydhx + hxdhy) + K * (funcADer(x[j][i], user) * normGradZ - A / normGradZ) * hx * hy;
        col[2].j = row.j;
        col[2].i = row.i;
        v[3]     = -D * hydhx + K * A * hx * hy / (2.0 * normGradZ);
        col[3].j = j;
        col[3].i = i + 1;
        v[4]     = -D * hxdhy + K * A * hx * hy / (2.0 * normGradZ);
        col[4].j = j + 1;
        col[4].i = i;
        for (k = 0; k < 5; ++k) PetscCheck(!PetscIsInfOrNanScalar(v[k]), PETSC_COMM_SELF, PETSC_ERR_FP, "Invalid residual: %g", (double)PetscRealPart(v[k]));
        PetscCall(MatSetValuesStencil(jac, 1, &row, 5, col, v, INSERT_VALUES));
      }
    }
  }

  /*
     Assemble matrix, using the 2-step process:
       MatAssemblyBegin(), MatAssemblyEnd().
  */
  PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
  PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
  /*
     Tell the matrix we will never add a new nonzero location to the
     matrix. If we do, it will generate an error.
  */
  PetscCall(MatSetOption(jac, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*TEST

   test:
      args: -snes_view -snes_monitor_short -da_refine 1 -pc_type mg -ksp_type fgmres -pc_mg_type full -mg_levels_ksp_chebyshev_esteig 0.5,1.1

   test:
      suffix: ew_1
      args: -snes_monitor_short -ksp_converged_reason -da_grid_x 20 -da_grid_y 20 -snes_ksp_ew -snes_ksp_ew_version 1
      requires: !single

   test:
      suffix: ew_2
      args: -snes_monitor_short -ksp_converged_reason -da_grid_x 20 -da_grid_y 20 -snes_ksp_ew -snes_ksp_ew_version 2

   test:
      suffix: ew_3
      args: -snes_monitor_short -ksp_converged_reason -da_grid_x 20 -da_grid_y 20 -snes_ksp_ew -snes_ksp_ew_version 3
      requires: !single

   test:
      suffix: fm_rise_2
      args: -K 3 -m 1 -D 0.2 -snes_monitor_short -snes_type ngmres -snes_npc_side right -npc_snes_type newtonls -npc_snes_linesearch_type basic -snes_ngmres_restart_it 1 -snes_ngmres_restart_fm_rise
      requires: !single

   test:
      suffix: fm_rise_4
      args: -K 3 -m 1 -D 0.2 -snes_monitor_short -snes_type ngmres -snes_npc_side right -npc_snes_type newtonls -npc_snes_linesearch_type basic -snes_ngmres_restart_it 2 -snes_ngmres_restart_fm_rise -snes_rtol 1.e-2 -snes_max_it 5

TEST*/
