static char help[] = "Solves one dimensional Burger's equation compares with exact solution\n\n";

/*
    Not yet tested in parallel
*/

/* ------------------------------------------------------------------------

   This program uses the one-dimensional Burger's equation
       u_t = mu*u_xx - u u_x,
   on the domain 0 <= x <= 1, with periodic boundary conditions

   The operators are discretized with the spectral element method

   See the paper PDE-CONSTRAINED OPTIMIZATION WITH SPECTRAL ELEMENTS USING PETSC AND TAO
   by OANA MARIN, EMIL CONSTANTINESCU, AND BARRY SMITH for details on the exact solution
   used

   See src/tao/unconstrained/tutorials/burgers_spectral.c

  ------------------------------------------------------------------------- */

#include <petscts.h>
#include <petscdt.h>
#include <petscdraw.h>
#include <petscdmda.h>

/*
   User-defined application context - contains data needed by the
   application-provided call-back routines.
*/

typedef struct {
  PetscInt   n;       /* number of nodes */
  PetscReal *nodes;   /* GLL nodes */
  PetscReal *weights; /* GLL weights */
} PetscGLL;

typedef struct {
  PetscInt  N;               /* grid points per elements*/
  PetscInt  E;               /* number of elements */
  PetscReal tol_L2, tol_max; /* error norms */
  PetscInt  steps;           /* number of timesteps */
  PetscReal Tend;            /* endtime */
  PetscReal mu;              /* viscosity */
  PetscReal L;               /* total length of domain */
  PetscReal Le;
  PetscReal Tadj;
} PetscParam;

typedef struct {
  Vec grid; /* total grid */
  Vec curr_sol;
} PetscData;

typedef struct {
  Vec      grid;  /* total grid */
  Vec      mass;  /* mass matrix for total integration */
  Mat      stiff; /* stiffness matrix */
  Mat      keptstiff;
  Mat      grad;
  PetscGLL gll;
} PetscSEMOperators;

typedef struct {
  DM                da; /* distributed array data structure */
  PetscSEMOperators SEMop;
  PetscParam        param;
  PetscData         dat;
  TS                ts;
  PetscReal         initial_dt;
} AppCtx;

/*
   User-defined routines
*/
extern PetscErrorCode RHSMatrixLaplaciangllDM(TS, PetscReal, Vec, Mat, Mat, void *);
extern PetscErrorCode RHSMatrixAdvectiongllDM(TS, PetscReal, Vec, Mat, Mat, void *);
extern PetscErrorCode TrueSolution(TS, PetscReal, Vec, AppCtx *);
extern PetscErrorCode RHSFunction(TS, PetscReal, Vec, Vec, void *);
extern PetscErrorCode RHSJacobian(TS, PetscReal, Vec, Mat, Mat, void *);

int main(int argc, char **argv)
{
  AppCtx       appctx; /* user-defined application context */
  PetscInt     i, xs, xm, ind, j, lenglob;
  PetscReal    x, *wrk_ptr1, *wrk_ptr2;
  MatNullSpace nsp;
  PetscMPIInt  size;

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Initialize program and set problem parameters
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscFunctionBeginUser;
  PetscCall(PetscInitialize(&argc, &argv, NULL, help));

  /*initialize parameters */
  appctx.param.N     = 10;   /* order of the spectral element */
  appctx.param.E     = 10;   /* number of elements */
  appctx.param.L     = 4.0;  /* length of the domain */
  appctx.param.mu    = 0.01; /* diffusion coefficient */
  appctx.initial_dt  = 5e-3;
  appctx.param.steps = PETSC_INT_MAX;
  appctx.param.Tend  = 4;

  PetscCall(PetscOptionsGetInt(NULL, NULL, "-N", &appctx.param.N, NULL));
  PetscCall(PetscOptionsGetInt(NULL, NULL, "-E", &appctx.param.E, NULL));
  PetscCall(PetscOptionsGetReal(NULL, NULL, "-Tend", &appctx.param.Tend, NULL));
  PetscCall(PetscOptionsGetReal(NULL, NULL, "-mu", &appctx.param.mu, NULL));
  appctx.param.Le = appctx.param.L / appctx.param.E;

  PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
  PetscCheck((appctx.param.E % size) == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of elements must be divisible by number of processes");

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create GLL data structures
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(PetscMalloc2(appctx.param.N, &appctx.SEMop.gll.nodes, appctx.param.N, &appctx.SEMop.gll.weights));
  PetscCall(PetscDTGaussLobattoLegendreQuadrature(appctx.param.N, PETSCGAUSSLOBATTOLEGENDRE_VIA_LINEAR_ALGEBRA, appctx.SEMop.gll.nodes, appctx.SEMop.gll.weights));
  appctx.SEMop.gll.n = appctx.param.N;
  lenglob            = appctx.param.E * (appctx.param.N - 1);

  /*
     Create distributed array (DMDA) to manage parallel grid and vectors
     and to set up the ghost point communication pattern.  There are E*(Nl-1)+1
     total grid values spread equally among all the processors, except first and last
  */

  PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_PERIODIC, lenglob, 1, 1, NULL, &appctx.da));
  PetscCall(DMSetFromOptions(appctx.da));
  PetscCall(DMSetUp(appctx.da));

  /*
     Extract global and local vectors from DMDA; we use these to store the
     approximate solution.  Then duplicate these for remaining vectors that
     have the same types.
  */

  PetscCall(DMCreateGlobalVector(appctx.da, &appctx.dat.curr_sol));
  PetscCall(VecDuplicate(appctx.dat.curr_sol, &appctx.SEMop.grid));
  PetscCall(VecDuplicate(appctx.dat.curr_sol, &appctx.SEMop.mass));

  PetscCall(DMDAGetCorners(appctx.da, &xs, NULL, NULL, &xm, NULL, NULL));
  PetscCall(DMDAVecGetArray(appctx.da, appctx.SEMop.grid, &wrk_ptr1));
  PetscCall(DMDAVecGetArray(appctx.da, appctx.SEMop.mass, &wrk_ptr2));

  /* Compute function over the locally owned part of the grid */

  xs = xs / (appctx.param.N - 1);
  xm = xm / (appctx.param.N - 1);

  /*
     Build total grid and mass over entire mesh (multi-elemental)
  */

  for (i = xs; i < xs + xm; i++) {
    for (j = 0; j < appctx.param.N - 1; j++) {
      x             = (appctx.param.Le / 2.0) * (appctx.SEMop.gll.nodes[j] + 1.0) + appctx.param.Le * i;
      ind           = i * (appctx.param.N - 1) + j;
      wrk_ptr1[ind] = x;
      wrk_ptr2[ind] = .5 * appctx.param.Le * appctx.SEMop.gll.weights[j];
      if (j == 0) wrk_ptr2[ind] += .5 * appctx.param.Le * appctx.SEMop.gll.weights[j];
    }
  }
  PetscCall(DMDAVecRestoreArray(appctx.da, appctx.SEMop.grid, &wrk_ptr1));
  PetscCall(DMDAVecRestoreArray(appctx.da, appctx.SEMop.mass, &wrk_ptr2));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   Create matrix data structure; set matrix evaluation routine.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMSetMatrixPreallocateOnly(appctx.da, PETSC_TRUE));
  PetscCall(DMCreateMatrix(appctx.da, &appctx.SEMop.stiff));
  PetscCall(DMCreateMatrix(appctx.da, &appctx.SEMop.grad));
  /*
   For linear problems with a time-dependent f(u,t) in the equation
   u_t = f(u,t), the user provides the discretized right-hand side
   as a time-dependent matrix.
   */
  PetscCall(RHSMatrixLaplaciangllDM(appctx.ts, 0.0, appctx.dat.curr_sol, appctx.SEMop.stiff, appctx.SEMop.stiff, &appctx));
  PetscCall(RHSMatrixAdvectiongllDM(appctx.ts, 0.0, appctx.dat.curr_sol, appctx.SEMop.grad, appctx.SEMop.grad, &appctx));
  /*
       For linear problems with a time-dependent f(u,t) in the equation
       u_t = f(u,t), the user provides the discretized right-hand side
       as a time-dependent matrix.
    */

  PetscCall(MatDuplicate(appctx.SEMop.stiff, MAT_COPY_VALUES, &appctx.SEMop.keptstiff));

  /* attach the null space to the matrix, this probably is not needed but does no harm */
  PetscCall(MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, NULL, &nsp));
  PetscCall(MatSetNullSpace(appctx.SEMop.stiff, nsp));
  PetscCall(MatSetNullSpace(appctx.SEMop.keptstiff, nsp));
  PetscCall(MatNullSpaceTest(nsp, appctx.SEMop.stiff, NULL));
  PetscCall(MatNullSpaceDestroy(&nsp));
  /* attach the null space to the matrix, this probably is not needed but does no harm */
  PetscCall(MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, NULL, &nsp));
  PetscCall(MatSetNullSpace(appctx.SEMop.grad, nsp));
  PetscCall(MatNullSpaceTest(nsp, appctx.SEMop.grad, NULL));
  PetscCall(MatNullSpaceDestroy(&nsp));

  /* Create the TS solver that solves the ODE and its adjoint; set its options */
  PetscCall(TSCreate(PETSC_COMM_WORLD, &appctx.ts));
  PetscCall(TSSetProblemType(appctx.ts, TS_NONLINEAR));
  PetscCall(TSSetType(appctx.ts, TSRK));
  PetscCall(TSSetDM(appctx.ts, appctx.da));
  PetscCall(TSSetTime(appctx.ts, 0.0));
  PetscCall(TSSetTimeStep(appctx.ts, appctx.initial_dt));
  PetscCall(TSSetMaxSteps(appctx.ts, appctx.param.steps));
  PetscCall(TSSetMaxTime(appctx.ts, appctx.param.Tend));
  PetscCall(TSSetExactFinalTime(appctx.ts, TS_EXACTFINALTIME_MATCHSTEP));
  PetscCall(TSSetTolerances(appctx.ts, 1e-7, NULL, 1e-7, NULL));
  PetscCall(TSSetSaveTrajectory(appctx.ts));
  PetscCall(TSSetFromOptions(appctx.ts));
  PetscCall(TSSetRHSFunction(appctx.ts, NULL, RHSFunction, &appctx));
  PetscCall(TSSetRHSJacobian(appctx.ts, appctx.SEMop.stiff, appctx.SEMop.stiff, RHSJacobian, &appctx));

  /* Set Initial conditions for the problem  */
  PetscCall(TrueSolution(appctx.ts, 0, appctx.dat.curr_sol, &appctx));

  PetscCall(TSSetSolutionFunction(appctx.ts, (PetscErrorCode (*)(TS, PetscReal, Vec, void *))TrueSolution, &appctx));
  PetscCall(TSSetTime(appctx.ts, 0.0));
  PetscCall(TSSetStepNumber(appctx.ts, 0));

  PetscCall(TSSolve(appctx.ts, appctx.dat.curr_sol));

  PetscCall(MatDestroy(&appctx.SEMop.stiff));
  PetscCall(MatDestroy(&appctx.SEMop.keptstiff));
  PetscCall(MatDestroy(&appctx.SEMop.grad));
  PetscCall(VecDestroy(&appctx.SEMop.grid));
  PetscCall(VecDestroy(&appctx.SEMop.mass));
  PetscCall(VecDestroy(&appctx.dat.curr_sol));
  PetscCall(PetscFree2(appctx.SEMop.gll.nodes, appctx.SEMop.gll.weights));
  PetscCall(DMDestroy(&appctx.da));
  PetscCall(TSDestroy(&appctx.ts));

  /*
     Always call PetscFinalize() before exiting a program.  This routine
       - finalizes the PETSc libraries as well as MPI
       - provides summary and diagnostic information if certain runtime
         options are chosen (e.g., -log_view).
  */
  PetscCall(PetscFinalize());
  return 0;
}

/*
   TrueSolution() computes the true solution for the PDE

   Input Parameter:
   u - uninitialized solution vector (global)
   appctx - user-defined application context

   Output Parameter:
   u - vector with solution at initial time (global)
*/
PetscErrorCode TrueSolution(TS ts, PetscReal t, Vec u, AppCtx *appctx)
{
  PetscScalar       *s;
  const PetscScalar *xg;
  PetscInt           i, xs, xn;

  PetscFunctionBeginUser;
  PetscCall(DMDAVecGetArray(appctx->da, u, &s));
  PetscCall(DMDAVecGetArrayRead(appctx->da, appctx->SEMop.grid, (void *)&xg));
  PetscCall(DMDAGetCorners(appctx->da, &xs, NULL, NULL, &xn, NULL, NULL));
  for (i = xs; i < xs + xn; i++) {
    s[i] = 2.0 * appctx->param.mu * PETSC_PI * PetscSinScalar(PETSC_PI * xg[i]) * PetscExpReal(-appctx->param.mu * PETSC_PI * PETSC_PI * t) / (2.0 + PetscCosScalar(PETSC_PI * xg[i]) * PetscExpReal(-appctx->param.mu * PETSC_PI * PETSC_PI * t));
  }
  PetscCall(DMDAVecRestoreArray(appctx->da, u, &s));
  PetscCall(DMDAVecRestoreArrayRead(appctx->da, appctx->SEMop.grid, (void *)&xg));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode RHSFunction(TS ts, PetscReal t, Vec globalin, Vec globalout, PetscCtx ctx)
{
  AppCtx *appctx = (AppCtx *)ctx;

  PetscFunctionBeginUser;
  PetscCall(MatMult(appctx->SEMop.grad, globalin, globalout)); /* grad u */
  PetscCall(VecPointwiseMult(globalout, globalin, globalout)); /* u grad u */
  PetscCall(VecScale(globalout, -1.0));
  PetscCall(MatMultAdd(appctx->SEMop.keptstiff, globalin, globalout, globalout));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*

      K is the discretiziation of the Laplacian
      G is the discretization of the gradient

      Computes Jacobian of      K u + diag(u) G u   which is given by
              K   + diag(u)G + diag(Gu)
*/
PetscErrorCode RHSJacobian(TS ts, PetscReal t, Vec globalin, Mat A, Mat B, PetscCtx ctx)
{
  AppCtx *appctx = (AppCtx *)ctx;
  Vec     Gglobalin;

  PetscFunctionBeginUser;
  /*    A = diag(u) G */

  PetscCall(MatCopy(appctx->SEMop.grad, A, SAME_NONZERO_PATTERN));
  PetscCall(MatDiagonalScale(A, globalin, NULL));

  /*    A  = A + diag(Gu) */
  PetscCall(VecDuplicate(globalin, &Gglobalin));
  PetscCall(MatMult(appctx->SEMop.grad, globalin, Gglobalin));
  PetscCall(MatDiagonalSet(A, Gglobalin, ADD_VALUES));
  PetscCall(VecDestroy(&Gglobalin));

  /*   A  = K - A    */
  PetscCall(MatScale(A, -1.0));
  PetscCall(MatAXPY(A, 0.0, appctx->SEMop.keptstiff, SAME_NONZERO_PATTERN));
  PetscFunctionReturn(PETSC_SUCCESS);
}

#include <petscblaslapack.h>
/*
     Matrix free operation of 1d Laplacian and Grad for GLL spectral elements
*/
PetscErrorCode MatMult_Laplacian(Mat A, Vec x, Vec y)
{
  AppCtx            *appctx;
  PetscReal        **temp, vv;
  PetscInt           i, j, xs, xn;
  Vec                xlocal, ylocal;
  const PetscScalar *xl;
  PetscScalar       *yl;
  PetscBLASInt       _One  = 1, n;
  PetscScalar        _DOne = 1;

  PetscFunctionBeginUser;
  PetscCall(MatShellGetContext(A, &appctx));
  PetscCall(DMGetLocalVector(appctx->da, &xlocal));
  PetscCall(DMGlobalToLocalBegin(appctx->da, x, INSERT_VALUES, xlocal));
  PetscCall(DMGlobalToLocalEnd(appctx->da, x, INSERT_VALUES, xlocal));
  PetscCall(DMGetLocalVector(appctx->da, &ylocal));
  PetscCall(VecSet(ylocal, 0.0));
  PetscCall(PetscGaussLobattoLegendreElementLaplacianCreate(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  for (i = 0; i < appctx->param.N; i++) {
    vv = -appctx->param.mu * 2.0 / appctx->param.Le;
    for (j = 0; j < appctx->param.N; j++) temp[i][j] = temp[i][j] * vv;
  }
  PetscCall(DMDAVecGetArrayRead(appctx->da, xlocal, (void *)&xl));
  PetscCall(DMDAVecGetArray(appctx->da, ylocal, &yl));
  PetscCall(DMDAGetCorners(appctx->da, &xs, NULL, NULL, &xn, NULL, NULL));
  PetscCall(PetscBLASIntCast(appctx->param.N, &n));
  for (j = xs; j < xs + xn; j += appctx->param.N - 1) PetscCallBLAS("BLASgemv", BLASgemv_("N", &n, &n, &_DOne, &temp[0][0], &n, &xl[j], &_One, &_DOne, &yl[j], &_One));
  PetscCall(DMDAVecRestoreArrayRead(appctx->da, xlocal, (void *)&xl));
  PetscCall(DMDAVecRestoreArray(appctx->da, ylocal, &yl));
  PetscCall(PetscGaussLobattoLegendreElementLaplacianDestroy(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  PetscCall(VecSet(y, 0.0));
  PetscCall(DMLocalToGlobalBegin(appctx->da, ylocal, ADD_VALUES, y));
  PetscCall(DMLocalToGlobalEnd(appctx->da, ylocal, ADD_VALUES, y));
  PetscCall(DMRestoreLocalVector(appctx->da, &xlocal));
  PetscCall(DMRestoreLocalVector(appctx->da, &ylocal));
  PetscCall(VecPointwiseDivide(y, y, appctx->SEMop.mass));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode MatMult_Advection(Mat A, Vec x, Vec y)
{
  AppCtx            *appctx;
  PetscReal        **temp;
  PetscInt           j, xs, xn;
  Vec                xlocal, ylocal;
  const PetscScalar *xl;
  PetscScalar       *yl;
  PetscBLASInt       _One  = 1, n;
  PetscScalar        _DOne = 1;

  PetscFunctionBeginUser;
  PetscCall(MatShellGetContext(A, &appctx));
  PetscCall(DMGetLocalVector(appctx->da, &xlocal));
  PetscCall(DMGlobalToLocalBegin(appctx->da, x, INSERT_VALUES, xlocal));
  PetscCall(DMGlobalToLocalEnd(appctx->da, x, INSERT_VALUES, xlocal));
  PetscCall(DMGetLocalVector(appctx->da, &ylocal));
  PetscCall(VecSet(ylocal, 0.0));
  PetscCall(PetscGaussLobattoLegendreElementAdvectionCreate(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  PetscCall(DMDAVecGetArrayRead(appctx->da, xlocal, (void *)&xl));
  PetscCall(DMDAVecGetArray(appctx->da, ylocal, &yl));
  PetscCall(DMDAGetCorners(appctx->da, &xs, NULL, NULL, &xn, NULL, NULL));
  PetscCall(PetscBLASIntCast(appctx->param.N, &n));
  for (j = xs; j < xs + xn; j += appctx->param.N - 1) PetscCallBLAS("BLASgemv", BLASgemv_("N", &n, &n, &_DOne, &temp[0][0], &n, &xl[j], &_One, &_DOne, &yl[j], &_One));
  PetscCall(DMDAVecRestoreArrayRead(appctx->da, xlocal, (void *)&xl));
  PetscCall(DMDAVecRestoreArray(appctx->da, ylocal, &yl));
  PetscCall(PetscGaussLobattoLegendreElementAdvectionDestroy(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  PetscCall(VecSet(y, 0.0));
  PetscCall(DMLocalToGlobalBegin(appctx->da, ylocal, ADD_VALUES, y));
  PetscCall(DMLocalToGlobalEnd(appctx->da, ylocal, ADD_VALUES, y));
  PetscCall(DMRestoreLocalVector(appctx->da, &xlocal));
  PetscCall(DMRestoreLocalVector(appctx->da, &ylocal));
  PetscCall(VecPointwiseDivide(y, y, appctx->SEMop.mass));
  PetscCall(VecScale(y, -1.0));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
   RHSMatrixLaplacian - User-provided routine to compute the right-hand-side
   matrix for the Laplacian operator

   Input Parameters:
   ts - the TS context
   t - current time  (ignored)
   X - current solution (ignored)
   dummy - optional user-defined context, as set by TSetRHSJacobian()

   Output Parameters:
   AA - Jacobian matrix
   BB - optionally different matrix from which the preconditioner is built

*/
PetscErrorCode RHSMatrixLaplaciangllDM(TS ts, PetscReal t, Vec X, Mat A, Mat BB, PetscCtx ctx)
{
  PetscReal **temp;
  PetscReal   vv;
  AppCtx     *appctx = (AppCtx *)ctx; /* user-defined application context */
  PetscInt    i, xs, xn, l, j;
  PetscInt   *rowsDM;
  PetscBool   flg = PETSC_FALSE;

  PetscFunctionBeginUser;
  PetscCall(PetscOptionsGetBool(NULL, NULL, "-gll_mf", &flg, NULL));

  if (!flg) {
    /*
     Creates the element stiffness matrix for the given gll
     */
    PetscCall(PetscGaussLobattoLegendreElementLaplacianCreate(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
    /* workaround for clang analyzer warning: Division by zero */
    PetscCheck(appctx->param.N > 1, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Spectral element order should be > 1");

    /* scale by the size of the element */
    for (i = 0; i < appctx->param.N; i++) {
      vv = -appctx->param.mu * 2.0 / appctx->param.Le;
      for (j = 0; j < appctx->param.N; j++) temp[i][j] = temp[i][j] * vv;
    }

    PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
    PetscCall(DMDAGetCorners(appctx->da, &xs, NULL, NULL, &xn, NULL, NULL));

    xs = xs / (appctx->param.N - 1);
    xn = xn / (appctx->param.N - 1);

    PetscCall(PetscMalloc1(appctx->param.N, &rowsDM));
    /*
     loop over local elements
     */
    for (j = xs; j < xs + xn; j++) {
      for (l = 0; l < appctx->param.N; l++) rowsDM[l] = 1 + (j - xs) * (appctx->param.N - 1) + l;
      PetscCall(MatSetValuesLocal(A, appctx->param.N, rowsDM, appctx->param.N, rowsDM, &temp[0][0], ADD_VALUES));
    }
    PetscCall(PetscFree(rowsDM));
    PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
    PetscCall(VecReciprocal(appctx->SEMop.mass));
    PetscCall(MatDiagonalScale(A, appctx->SEMop.mass, 0));
    PetscCall(VecReciprocal(appctx->SEMop.mass));

    PetscCall(PetscGaussLobattoLegendreElementLaplacianDestroy(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  } else {
    PetscCall(MatSetType(A, MATSHELL));
    PetscCall(MatSetUp(A));
    PetscCall(MatShellSetContext(A, appctx));
    PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Laplacian));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*
   RHSMatrixAdvection - User-provided routine to compute the right-hand-side
   matrix for the Advection (gradient) operator.

   Input Parameters:
   ts - the TS context
   t - current time
   global_in - global input vector
   dummy - optional user-defined context, as set by TSetRHSJacobian()

   Output Parameters:
   AA - Jacobian matrix
   BB - optionally different matrix used to construct the preconditioner

*/
PetscErrorCode RHSMatrixAdvectiongllDM(TS ts, PetscReal t, Vec X, Mat A, Mat BB, PetscCtx ctx)
{
  PetscReal **temp;
  AppCtx     *appctx = (AppCtx *)ctx; /* user-defined application context */
  PetscInt    xs, xn, l, j;
  PetscInt   *rowsDM;
  PetscBool   flg = PETSC_FALSE;

  PetscFunctionBeginUser;
  PetscCall(PetscOptionsGetBool(NULL, NULL, "-gll_mf", &flg, NULL));

  if (!flg) {
    /*
     Creates the advection matrix for the given gll
     */
    PetscCall(PetscGaussLobattoLegendreElementAdvectionCreate(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
    PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
    PetscCall(DMDAGetCorners(appctx->da, &xs, NULL, NULL, &xn, NULL, NULL));
    xs = xs / (appctx->param.N - 1);
    xn = xn / (appctx->param.N - 1);

    PetscCall(PetscMalloc1(appctx->param.N, &rowsDM));
    for (j = xs; j < xs + xn; j++) {
      for (l = 0; l < appctx->param.N; l++) rowsDM[l] = 1 + (j - xs) * (appctx->param.N - 1) + l;
      PetscCall(MatSetValuesLocal(A, appctx->param.N, rowsDM, appctx->param.N, rowsDM, &temp[0][0], ADD_VALUES));
    }
    PetscCall(PetscFree(rowsDM));
    PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
    PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

    PetscCall(VecReciprocal(appctx->SEMop.mass));
    PetscCall(MatDiagonalScale(A, appctx->SEMop.mass, 0));
    PetscCall(VecReciprocal(appctx->SEMop.mass));

    PetscCall(PetscGaussLobattoLegendreElementAdvectionDestroy(appctx->SEMop.gll.n, appctx->SEMop.gll.nodes, appctx->SEMop.gll.weights, &temp));
  } else {
    PetscCall(MatSetType(A, MATSHELL));
    PetscCall(MatSetUp(A));
    PetscCall(MatShellSetContext(A, appctx));
    PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Advection));
  }
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*TEST

    build:
      requires: !complex

    test:
      suffix: 1
      requires: !single
      output_file: output/empty.out

    test:
      suffix: 2
      nsize: 5
      requires: !single
      output_file: output/empty.out

    test:
      suffix: 3
      requires: !single
      args: -ts_view -ts_type beuler -gll_mf -pc_type none -ts_max_steps 5 -ts_monitor_error

    test:
      suffix: 4
      requires: !single
      args: -ts_view -ts_type beuler -pc_type none -ts_max_steps 5 -ts_monitor_error

TEST*/
