xref: /libCEED/examples/fluids/navierstokes.c (revision 4438636ff73623f226c6a4a41962ea70516f8496)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 //                        libCEED + PETSc Example: Navier-Stokes
18 //
19 // This example demonstrates a simple usage of libCEED with PETSc to solve a
20 // Navier-Stokes problem.
21 //
22 // The code is intentionally "raw", using only low-level communication
23 // primitives.
24 //
25 // Build with:
26 //
27 //     make [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] navierstokes
28 //
29 // Sample runs:
30 //
31 //     ./navierstokes -ceed /cpu/self -problem density_current -degree 1
32 //     ./navierstokes -ceed /gpu/cuda -problem advection -degree 1
33 //
34 //TESTARGS(name="dc_explicit") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-explicit.bin
35 //TESTARGS(name="dc_implicit_stab_none") -ceed {ceed_resource} -test -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-dc-implicit-stab-none.bin
36 //TESTARGS(name="adv_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection -strong_form 1 -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-explicit-strong.bin
37 //TESTARGS(name="adv_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-rotation-implicit-stab-supg.bin
38 //TESTARGS(name="adv_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -problem_advection_wind translation -problem_advection_wind_translation .53,-1.33,-2.65 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv-translation-implicit-stab-su.bin
39 //TESTARGS(name="adv2d_rotation_explicit_strong") -ceed {ceed_resource} -test -problem advection2d -strong_form 1 -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ts_dt 1e-3 -compare_final_state_atol 1E-11 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-explicit-strong.bin
40 //TESTARGS(name="adv2d_rotation_implicit_stab_supg") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab supg -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-rotation-implicit-stab-supg.bin
41 //TESTARGS(name="adv2d_translation_implicit_stab_su") -ceed {ceed_resource} -test -problem advection2d -CtauS .3 -stab su -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -problem_advection_wind translation -problem_advection_wind_translation .53,-1.33,0 -compare_final_state_atol 5E-4 -compare_final_state_filename examples/fluids/tests-output/fluids-navierstokes-adv2d-translation-implicit-stab-su.bin
42 
43 /// @file
44 /// Navier-Stokes example using PETSc
45 
46 const char help[] = "Solve Navier-Stokes using PETSc and libCEED\n";
47 
48 #include <ceed.h>
49 #include <petscdmplex.h>
50 #include <petscsys.h>
51 #include <petscts.h>
52 #include <stdbool.h>
53 #include "advection.h"
54 #include "advection2d.h"
55 #include "common.h"
56 #include "euler-vortex.h"
57 #include "densitycurrent.h"
58 #include "setup-boundary.h"
59 
60 #if PETSC_VERSION_LT(3,14,0)
61 #  define DMPlexGetClosureIndices(a,b,c,d,e,f,g,h,i) DMPlexGetClosureIndices(a,b,c,d,f,g,i)
62 #  define DMPlexRestoreClosureIndices(a,b,c,d,e,f,g,h,i) DMPlexRestoreClosureIndices(a,b,c,d,f,g,i)
63 #endif
64 
65 #if PETSC_VERSION_LT(3,14,0)
66 #  define DMAddBoundary(a,b,c,d,e,f,g,h,i,j,k,l) DMAddBoundary(a,b,c,d,e,f,g,h,j,k,l)
67 #endif
68 
69 // MemType Options
70 static const char *const memTypes[] = {
71   "host",
72   "device",
73   "memType", "CEED_MEM_", NULL
74 };
75 
76 // Problem Options
77 typedef enum {
78   NS_DENSITY_CURRENT = 0,
79   NS_ADVECTION = 1,
80   NS_ADVECTION2D = 2,
81   NS_EULER_VORTEX = 3,
82 } problemType;
83 static const char *const problemTypes[] = {
84   "density_current",
85   "advection",
86   "advection2d",
87   "euler_vortex"
88   "problemType", "NS_", NULL
89 };
90 
91 // Wind Options for Advection
92 typedef enum {
93   ADVECTION_WIND_ROTATION = 0,
94   ADVECTION_WIND_TRANSLATION = 1,
95 } WindType;
96 static const char *const WindTypes[] = {
97   "rotation",
98   "translation",
99   "WindType", "ADVECTION_WIND_", NULL
100 };
101 
102 typedef enum {
103   STAB_NONE = 0,
104   STAB_SU = 1,   // Streamline Upwind
105   STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
106 } StabilizationType;
107 static const char *const StabilizationTypes[] = {
108   "none",
109   "SU",
110   "SUPG",
111   "StabilizationType", "STAB_", NULL
112 };
113 
114 // Problem specific data
115 typedef struct {
116   CeedInt dim, qdatasizeVol, qdatasizeSur;
117   CeedQFunctionUser setupVol, setupSur, ics, applyVol_rhs, applyVol_ifunction,
118                     applySur, applyIn, applyOut;
119   PetscErrorCode (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
120                        PetscScalar[], void *);
121   const char *setupVol_loc, *setupSur_loc, *ics_loc, *applyVol_rhs_loc,
122         *applyVol_ifunction_loc, *applySur_loc, *applyIn_loc, *applyOut_loc;
123   const bool non_zero_time;
124 } problemData;
125 
126 problemData problemOptions[] = {
127   [NS_DENSITY_CURRENT] = {
128     .dim                       = 3,
129     .qdatasizeVol              = 10,
130     .qdatasizeSur              = 4,
131     .setupVol                  = Setup,
132     .setupVol_loc              = Setup_loc,
133     .setupSur                  = SetupBoundary,
134     .setupSur_loc              = SetupBoundary_loc,
135     .ics                       = ICsDC,
136     .ics_loc                   = ICsDC_loc,
137     .applyVol_rhs              = DC,
138     .applyVol_rhs_loc          = DC_loc,
139     .applyVol_ifunction        = IFunction_DC,
140     .applyVol_ifunction_loc    = IFunction_DC_loc,
141     .bc                        = Exact_DC,
142     .non_zero_time             = PETSC_FALSE,
143   },
144   [NS_ADVECTION] = {
145     .dim                       = 3,
146     .qdatasizeVol              = 10,
147     .qdatasizeSur              = 4,
148     .setupVol                  = Setup,
149     .setupVol_loc              = Setup_loc,
150     .setupSur                  = SetupBoundary,
151     .setupSur_loc              = SetupBoundary_loc,
152     .ics                       = ICsAdvection,
153     .ics_loc                   = ICsAdvection_loc,
154     .applyVol_rhs              = Advection,
155     .applyVol_rhs_loc          = Advection_loc,
156     .applyVol_ifunction        = IFunction_Advection,
157     .applyVol_ifunction_loc    = IFunction_Advection_loc,
158     .applySur                  = Advection_Sur,
159     .applySur_loc              = Advection_Sur_loc,
160     .bc                        = Exact_Advection,
161     .non_zero_time             = PETSC_FALSE,
162   },
163   [NS_ADVECTION2D] = {
164     .dim                       = 2,
165     .qdatasizeVol              = 5,
166     .qdatasizeSur              = 3,
167     .setupVol                  = Setup2d,
168     .setupVol_loc              = Setup2d_loc,
169     .setupSur                  = SetupBoundary2d,
170     .setupSur_loc              = SetupBoundary2d_loc,
171     .ics                       = ICsAdvection2d,
172     .ics_loc                   = ICsAdvection2d_loc,
173     .applyVol_rhs              = Advection2d,
174     .applyVol_rhs_loc          = Advection2d_loc,
175     .applyVol_ifunction        = IFunction_Advection2d,
176     .applyVol_ifunction_loc    = IFunction_Advection2d_loc,
177     .applySur                  = Advection2d_Sur,
178     .applySur_loc              = Advection2d_Sur_loc,
179     .bc                        = Exact_Advection2d,
180     .non_zero_time             = PETSC_TRUE,
181   },
182   [NS_EULER_VORTEX] = {
183     .dim                       = 3,
184     .qdatasizeVol              = 10,
185     .qdatasizeSur              = 4,
186     .setupVol                  = Setup,
187     .setupVol_loc              = Setup_loc,
188     .setupSur                  = SetupBoundary,
189     .setupSur_loc              = SetupBoundary_loc,
190     .ics                       = ICsEuler,
191     .ics_loc                   = ICsEuler_loc,
192     .applyVol_rhs              = Euler,
193     .applyVol_rhs_loc          = Euler_loc,
194     .applyIn                   = Euler_In,
195     .applyIn_loc               = Euler_In_loc,
196     .applyOut                  = Euler_Out,
197     .applyOut_loc              = Euler_Out_loc,
198     .bc                        = Exact_Euler,
199     .non_zero_time             = PETSC_FALSE,  // TODO: this needs to be true
200   },
201 };
202 
203 // PETSc user data
204 typedef struct User_ *User;
205 typedef struct Units_ *Units;
206 
207 struct User_ {
208   MPI_Comm comm;
209   PetscInt outputfreq;
210   DM dm;
211   DM dmviz;
212   Mat interpviz;
213   Ceed ceed;
214   Units units;
215   CeedVector qceed, qdotceed, gceed;
216   CeedOperator op_rhs_vol, op_rhs, op_ifunction_vol, op_ifunction;
217   Vec M;
218   char outputdir[PETSC_MAX_PATH_LEN];
219   PetscInt contsteps;
220 };
221 
222 struct Units_ {
223   // fundamental units
224   PetscScalar meter;
225   PetscScalar kilogram;
226   PetscScalar second;
227   PetscScalar Kelvin;
228   // derived units
229   PetscScalar Pascal;
230   PetscScalar JperkgK;
231   PetscScalar mpersquareds;
232   PetscScalar WpermK;
233   PetscScalar kgpercubicm;
234   PetscScalar kgpersquaredms;
235   PetscScalar Joulepercubicm;
236   PetscScalar Joule;
237 };
238 
239 typedef struct SimpleBC_ *SimpleBC;
240 struct SimpleBC_ {
241   PetscInt nwall, nslip[3];
242   PetscInt walls[6], slips[3][6];
243   PetscBool userbc;
244 };
245 
246 // Essential BC dofs are encoded in closure indices as -(i+1).
247 static PetscInt Involute(PetscInt i) {
248   return i >= 0 ? i : -(i+1);
249 }
250 
251 // Utility function to create local CEED restriction
252 static PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt P,
253     CeedInt height, DMLabel domainLabel, CeedInt value,
254     CeedElemRestriction *Erestrict) {
255 
256   PetscSection section;
257   PetscInt p, Nelem, Ndof, *erestrict, eoffset, nfields, dim, depth;
258   DMLabel depthLabel;
259   IS depthIS, iterIS;
260   Vec Uloc;
261   const PetscInt *iterIndices;
262   PetscErrorCode ierr;
263 
264   PetscFunctionBeginUser;
265   ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
266   dim -= height;
267   ierr = DMGetLocalSection(dm, &section); CHKERRQ(ierr);
268   ierr = PetscSectionGetNumFields(section, &nfields); CHKERRQ(ierr);
269   PetscInt ncomp[nfields], fieldoff[nfields+1];
270   fieldoff[0] = 0;
271   for (PetscInt f=0; f<nfields; f++) {
272     ierr = PetscSectionGetFieldComponents(section, f, &ncomp[f]); CHKERRQ(ierr);
273     fieldoff[f+1] = fieldoff[f] + ncomp[f];
274   }
275 
276   ierr = DMPlexGetDepth(dm, &depth); CHKERRQ(ierr);
277   ierr = DMPlexGetDepthLabel(dm, &depthLabel); CHKERRQ(ierr);
278   ierr = DMLabelGetStratumIS(depthLabel, depth - height, &depthIS); CHKERRQ(ierr);
279   if (domainLabel) {
280     IS domainIS;
281     ierr = DMLabelGetStratumIS(domainLabel, value, &domainIS); CHKERRQ(ierr);
282     if (domainIS) { // domainIS is non-empty
283       ierr = ISIntersect(depthIS, domainIS, &iterIS); CHKERRQ(ierr);
284       ierr = ISDestroy(&domainIS); CHKERRQ(ierr);
285     } else { // domainIS is NULL (empty)
286       iterIS = NULL;
287     }
288     ierr = ISDestroy(&depthIS); CHKERRQ(ierr);
289   } else {
290     iterIS = depthIS;
291   }
292   if (iterIS) {
293     ierr = ISGetLocalSize(iterIS, &Nelem); CHKERRQ(ierr);
294     ierr = ISGetIndices(iterIS, &iterIndices); CHKERRQ(ierr);
295   } else {
296     Nelem = 0;
297     iterIndices = NULL;
298   }
299   ierr = PetscMalloc1(Nelem*PetscPowInt(P, dim), &erestrict); CHKERRQ(ierr);
300   for (p=0,eoffset=0; p<Nelem; p++) {
301     PetscInt c = iterIndices[p];
302     PetscInt numindices, *indices, nnodes;
303     ierr = DMPlexGetClosureIndices(dm, section, section, c, PETSC_TRUE,
304                                    &numindices, &indices, NULL, NULL);
305     CHKERRQ(ierr);
306     bool flip = false;
307     if (height > 0) {
308       PetscInt numCells, numFaces, start = -1;
309       const PetscInt *orients, *faces, *cells;
310       ierr = DMPlexGetSupport(dm, c, &cells); CHKERRQ(ierr);
311       ierr = DMPlexGetSupportSize(dm, c, &numCells); CHKERRQ(ierr);
312       if (numCells != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,
313                                     "Expected one cell in support of exterior face, but got %D cells",
314                                     numCells);
315       ierr = DMPlexGetCone(dm, cells[0], &faces); CHKERRQ(ierr);
316       ierr = DMPlexGetConeSize(dm, cells[0], &numFaces); CHKERRQ(ierr);
317       for (PetscInt i=0; i<numFaces; i++) {if (faces[i] == c) start = i;}
318       if (start < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT,
319                                 "Could not find face %D in cone of its support",
320                                 c);
321       ierr = DMPlexGetConeOrientation(dm, cells[0], &orients); CHKERRQ(ierr);
322       if (orients[start] < 0) flip = true;
323     }
324     if (numindices % fieldoff[nfields]) SETERRQ1(PETSC_COMM_SELF,
325           PETSC_ERR_ARG_INCOMP, "Number of closure indices not compatible with Cell %D",
326           c);
327     nnodes = numindices / fieldoff[nfields];
328     for (PetscInt i=0; i<nnodes; i++) {
329       PetscInt ii = i;
330       if (flip) {
331         if (P == nnodes) ii = nnodes - 1 - i;
332         else if (P*P == nnodes) {
333           PetscInt row = i / P, col = i % P;
334           ii = row + col * P;
335         } else SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_SUP,
336                           "No support for flipping point with %D nodes != P (%D) or P^2",
337                           nnodes, P);
338       }
339       // Check that indices are blocked by node and thus can be coalesced as a single field with
340       // fieldoff[nfields] = sum(ncomp) components.
341       for (PetscInt f=0; f<nfields; f++) {
342         for (PetscInt j=0; j<ncomp[f]; j++) {
343           if (Involute(indices[fieldoff[f]*nnodes + ii*ncomp[f] + j])
344               != Involute(indices[ii*ncomp[0]]) + fieldoff[f] + j)
345             SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,
346                      "Cell %D closure indices not interlaced for node %D field %D component %D",
347                      c, ii, f, j);
348         }
349       }
350       // Essential boundary conditions are encoded as -(loc+1), but we don't care so we decode.
351       PetscInt loc = Involute(indices[ii*ncomp[0]]);
352       erestrict[eoffset++] = loc;
353     }
354     ierr = DMPlexRestoreClosureIndices(dm, section, section, c, PETSC_TRUE,
355                                        &numindices, &indices, NULL, NULL);
356     CHKERRQ(ierr);
357   }
358   if (eoffset != Nelem*PetscPowInt(P, dim))
359     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_LIB,
360              "ElemRestriction of size (%D,%D) initialized %D nodes", Nelem,
361              PetscPowInt(P, dim),eoffset);
362   if (iterIS) {
363     ierr = ISRestoreIndices(iterIS, &iterIndices); CHKERRQ(ierr);
364   }
365   ierr = ISDestroy(&iterIS); CHKERRQ(ierr);
366 
367   ierr = DMGetLocalVector(dm, &Uloc); CHKERRQ(ierr);
368   ierr = VecGetLocalSize(Uloc, &Ndof); CHKERRQ(ierr);
369   ierr = DMRestoreLocalVector(dm, &Uloc); CHKERRQ(ierr);
370   CeedElemRestrictionCreate(ceed, Nelem, PetscPowInt(P, dim), fieldoff[nfields],
371                             1, Ndof, CEED_MEM_HOST, CEED_COPY_VALUES, erestrict,
372                             Erestrict);
373   ierr = PetscFree(erestrict); CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 // Utility function to get Ceed Restriction for each domain
378 static PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt height,
379     DMLabel domainLabel, PetscInt value, CeedInt P, CeedInt Q, CeedInt qdatasize,
380     CeedElemRestriction *restrictq, CeedElemRestriction *restrictx,
381     CeedElemRestriction *restrictqdi) {
382 
383   DM dmcoord;
384   CeedInt dim, localNelem;
385   CeedInt Qdim;
386   PetscErrorCode ierr;
387 
388   PetscFunctionBeginUser;
389   ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
390   dim -= height;
391   Qdim = CeedIntPow(Q, dim);
392   ierr = DMGetCoordinateDM(dm, &dmcoord); CHKERRQ(ierr);
393   ierr = DMPlexSetClosurePermutationTensor(dmcoord, PETSC_DETERMINE, NULL);
394   CHKERRQ(ierr);
395   ierr = CreateRestrictionFromPlex(ceed, dm, P, height, domainLabel, value,
396                                    restrictq);
397   CHKERRQ(ierr);
398   ierr = CreateRestrictionFromPlex(ceed, dmcoord, 2, height, domainLabel, value,
399                                    restrictx);
400   CHKERRQ(ierr);
401   CeedElemRestrictionGetNumElements(*restrictq, &localNelem);
402   CeedElemRestrictionCreateStrided(ceed, localNelem, Qdim,
403                                    qdatasize, qdatasize*localNelem*Qdim,
404                                    CEED_STRIDES_BACKEND, restrictqdi);
405   PetscFunctionReturn(0);
406 }
407 
408 // Utility function to create CEED Composite Operator for the entire domain
409 static PetscErrorCode CreateOperatorForDomain(Ceed ceed, DM dm, SimpleBC bc,
410     problemType problemChoice, WindType wind_type, CeedOperator op_applyVol,
411     CeedQFunction qf_applySur, CeedQFunction qf_setupSur,
412     CeedQFunction qf_applyIn, CeedQFunction qf_applyOut, CeedInt height,
413     CeedInt numP_Sur, CeedInt numQ_Sur, CeedInt qdatasizeSur, CeedInt NqptsSur,
414     CeedBasis basisxSur, CeedBasis basisqSur, CeedOperator *op_apply) {
415 
416   CeedInt dim, nFace;
417   PetscInt lsize;
418   Vec Xloc;
419   CeedVector xcorners;
420   DMLabel domainLabel;
421   PetscScalar *x;
422   PetscErrorCode ierr;
423 
424   PetscFunctionBeginUser;
425   // Composite Operaters
426   CeedCompositeOperatorCreate(ceed, op_apply);
427   // --Apply a Sub-Operator for the volume
428   CeedCompositeOperatorAddSub(*op_apply, op_applyVol);
429 
430   // Required data for in/outflow BCs
431   ierr = DMGetCoordinatesLocal(dm, &Xloc); CHKERRQ(ierr);
432   ierr = VecGetLocalSize(Xloc, &lsize); CHKERRQ(ierr);
433   ierr = CeedVectorCreate(ceed, lsize, &xcorners); CHKERRQ(ierr);
434   ierr = VecGetArray(Xloc, &x); CHKERRQ(ierr);
435   CeedVectorSetArray(xcorners, CEED_MEM_HOST, CEED_USE_POINTER, x);
436   ierr = DMGetLabel(dm, "Face Sets", &domainLabel); CHKERRQ(ierr);
437   ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
438 
439   if (wind_type == ADVECTION_WIND_TRANSLATION ) {
440     // Ignore wall and slip BCs
441     bc->nwall = 0;
442     bc->nslip[0] = bc->nslip[1] = bc->nslip[2] = 0;
443 
444     // Set number of faces
445     if (dim == 2) nFace = 4;
446     if (dim == 3) nFace = 6;
447 
448     // Create CEED Operator for each boundary face
449     PetscInt localNelemSur[6];
450     CeedVector qdataSur[6];
451     CeedOperator op_setupSur[6], op_applySur[6];
452     CeedElemRestriction restrictxSur[6], restrictqSur[6], restrictqdiSur[6];
453     for (CeedInt i=0; i<nFace; i++) {
454       ierr = GetRestrictionForDomain(ceed, dm, height, domainLabel, i+1, numP_Sur,
455                                      numQ_Sur, qdatasizeSur, &restrictqSur[i],
456                                      &restrictxSur[i], &restrictqdiSur[i]);
457       CHKERRQ(ierr);
458       // Create the CEED vectors that will be needed in Boundary setup
459       CeedElemRestrictionGetNumElements(restrictqSur[i], &localNelemSur[i]);
460       CeedVectorCreate(ceed, qdatasizeSur*localNelemSur[i]*NqptsSur,
461                        &qdataSur[i]);
462       // Create the operator that builds the quadrature data for the Boundary operator
463       CeedOperatorCreate(ceed, qf_setupSur, NULL, NULL, &op_setupSur[i]);
464       CeedOperatorSetField(op_setupSur[i], "dx", restrictxSur[i], basisxSur,
465                            CEED_VECTOR_ACTIVE);
466       CeedOperatorSetField(op_setupSur[i], "weight", CEED_ELEMRESTRICTION_NONE,
467                            basisxSur, CEED_VECTOR_NONE);
468       CeedOperatorSetField(op_setupSur[i], "qdataSur", restrictqdiSur[i],
469                            CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
470       // Create Boundary operator
471       CeedOperatorCreate(ceed, qf_applySur, NULL, NULL, &op_applySur[i]);
472       CeedOperatorSetField(op_applySur[i], "q", restrictqSur[i], basisqSur,
473                            CEED_VECTOR_ACTIVE);
474       CeedOperatorSetField(op_applySur[i], "qdataSur", restrictqdiSur[i],
475                            CEED_BASIS_COLLOCATED, qdataSur[i]);
476       CeedOperatorSetField(op_applySur[i], "x", restrictxSur[i], basisxSur,
477                            xcorners);
478       CeedOperatorSetField(op_applySur[i], "v", restrictqSur[i], basisqSur,
479                            CEED_VECTOR_ACTIVE);
480       // Apply CEED operator for Boundary setup
481       CeedOperatorApply(op_setupSur[i], xcorners, qdataSur[i],
482                         CEED_REQUEST_IMMEDIATE);
483       // --Apply Sub-Operator for the Boundary
484       CeedCompositeOperatorAddSub(*op_apply, op_applySur[i]);
485     }
486     CeedVectorDestroy(&xcorners);
487   }
488 
489   if (problemChoice == NS_EULER_VORTEX) {
490     // All faces except for 5 and 6 are slip
491     bc->nwall = 0;
492     bc->nslip[0] = 0;
493     bc->nslip[1] = 2;     bc->slips[1][0] = 3; bc->slips[1][1] = 4;
494     bc->nslip[2] = 2;     bc->slips[2][0] = 1; bc->slips[2][1] = 2;
495 
496 
497     // Create CEED Operator for each boundary face
498     PetscInt localNelemSur[2];
499     CeedVector qdataSur[2];
500     CeedQFunction qf_apply;
501     CeedOperator op_setupSur[2], op_applySur[2];
502     CeedElemRestriction restrictxSur[2], restrictqSur[2], restrictqdiSur[2];
503     for (CeedInt i=0; i<2; i++) {
504       if (i == 0) qf_apply = qf_applyIn;
505       if (i == 1) qf_apply = qf_applyOut;
506       ierr = GetRestrictionForDomain(ceed, dm, height, domainLabel, i+5, numP_Sur,
507                                      numQ_Sur, qdatasizeSur, &restrictqSur[i],
508                                      &restrictxSur[i], &restrictqdiSur[i]);
509       CHKERRQ(ierr);
510       // Create the CEED vectors that will be needed in Boundary setup
511       CeedElemRestrictionGetNumElements(restrictqSur[i], &localNelemSur[i]);
512       CeedVectorCreate(ceed, qdatasizeSur*localNelemSur[i]*NqptsSur,
513                        &qdataSur[i]);
514       // Create the operator that builds the quadrature data for the Boundary operator
515       CeedOperatorCreate(ceed, qf_setupSur, NULL, NULL, &op_setupSur[i]);
516       CeedOperatorSetField(op_setupSur[i], "dx", restrictxSur[i], basisxSur,
517                            CEED_VECTOR_ACTIVE);
518       CeedOperatorSetField(op_setupSur[i], "weight", CEED_ELEMRESTRICTION_NONE,
519                            basisxSur, CEED_VECTOR_NONE);
520       CeedOperatorSetField(op_setupSur[i], "qdataSur", restrictqdiSur[i],
521                            CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
522       // Create Boundary operator
523       CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_applySur[i]);
524       CeedOperatorSetField(op_applySur[i], "q", restrictqSur[i], basisqSur,
525                            CEED_VECTOR_ACTIVE);
526       CeedOperatorSetField(op_applySur[i], "qdataSur", restrictqdiSur[i],
527                            CEED_BASIS_COLLOCATED, qdataSur[i]);
528       CeedOperatorSetField(op_applySur[i], "x", restrictxSur[i], basisxSur,
529                            xcorners);
530       CeedOperatorSetField(op_applySur[i], "v", restrictqSur[i], basisqSur,
531                            CEED_VECTOR_ACTIVE);
532       // Apply CEED operator for Boundary setup
533       CeedOperatorApply(op_setupSur[i], xcorners, qdataSur[i],
534                         CEED_REQUEST_IMMEDIATE);
535       // --Apply Sub-Operator for the Boundary
536       CeedCompositeOperatorAddSub(*op_apply, op_applySur[i]);
537     }
538     CeedVectorDestroy(&xcorners);
539 
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 static int CreateVectorFromPetscVec(Ceed ceed, Vec p, CeedVector *v) {
545   PetscErrorCode ierr;
546   PetscInt m;
547 
548   PetscFunctionBeginUser;
549   ierr = VecGetLocalSize(p, &m); CHKERRQ(ierr);
550   ierr = CeedVectorCreate(ceed, m, v); CHKERRQ(ierr);
551   PetscFunctionReturn(0);
552 }
553 
554 static int VectorPlacePetscVec(CeedVector c, Vec p) {
555   PetscErrorCode ierr;
556   PetscInt mceed, mpetsc;
557   PetscScalar *a;
558 
559   PetscFunctionBeginUser;
560   ierr = CeedVectorGetLength(c, &mceed); CHKERRQ(ierr);
561   ierr = VecGetLocalSize(p, &mpetsc); CHKERRQ(ierr);
562   if (mceed != mpetsc) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,
563                                   "Cannot place PETSc Vec of length %D in CeedVector of length %D",
564                                   mpetsc, mceed);
565   ierr = VecGetArray(p, &a); CHKERRQ(ierr);
566   CeedVectorSetArray(c, CEED_MEM_HOST, CEED_USE_POINTER, a);
567   PetscFunctionReturn(0);
568 }
569 
570 static PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
571     PetscBool insertEssential, Vec Qloc, PetscReal time, Vec faceGeomFVM,
572     Vec cellGeomFVM, Vec gradFVM) {
573   PetscErrorCode ierr;
574   Vec Qbc;
575 
576   PetscFunctionBegin;
577   ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
578   ierr = VecAXPY(Qloc, 1., Qbc); CHKERRQ(ierr);
579   ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 // This is the RHS of the ODE, given as u_t = G(t,u)
584 // This function takes in a state vector Q and writes into G
585 static PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *userData) {
586   PetscErrorCode ierr;
587   User user = *(User *)userData;
588   PetscScalar *q, *g;
589   Vec Qloc, Gloc;
590 
591   // Global-to-local
592   PetscFunctionBeginUser;
593   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
594   ierr = DMGetLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
595   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
596   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
597   ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Qloc, 0.0,
598                                     NULL, NULL, NULL); CHKERRQ(ierr);
599   ierr = VecZeroEntries(Gloc); CHKERRQ(ierr);
600 
601   // Ceed Vectors
602   ierr = VecGetArrayRead(Qloc, (const PetscScalar **)&q); CHKERRQ(ierr);
603   ierr = VecGetArray(Gloc, &g); CHKERRQ(ierr);
604   CeedVectorSetArray(user->qceed, CEED_MEM_HOST, CEED_USE_POINTER, q);
605   CeedVectorSetArray(user->gceed, CEED_MEM_HOST, CEED_USE_POINTER, g);
606 
607   // Apply CEED operator
608   CeedOperatorApply(user->op_rhs, user->qceed, user->gceed,
609                     CEED_REQUEST_IMMEDIATE);
610 
611   // Restore vectors
612   ierr = VecRestoreArrayRead(Qloc, (const PetscScalar **)&q); CHKERRQ(ierr);
613   ierr = VecRestoreArray(Gloc, &g); CHKERRQ(ierr);
614 
615   ierr = VecZeroEntries(G); CHKERRQ(ierr);
616   ierr = DMLocalToGlobal(user->dm, Gloc, ADD_VALUES, G); CHKERRQ(ierr);
617 
618   // Inverse of the lumped mass matrix
619   ierr = VecPointwiseMult(G, G, user->M); // M is Minv
620   CHKERRQ(ierr);
621 
622   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
623   ierr = DMRestoreLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 static PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Qdot, Vec G,
628                                    void *userData) {
629   PetscErrorCode ierr;
630   User user = *(User *)userData;
631   const PetscScalar *q, *qdot;
632   PetscScalar *g;
633   Vec Qloc, Qdotloc, Gloc;
634 
635   // Global-to-local
636   PetscFunctionBeginUser;
637   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
638   ierr = DMGetLocalVector(user->dm, &Qdotloc); CHKERRQ(ierr);
639   ierr = DMGetLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
640   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
641   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
642   ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Qloc, 0.0,
643                                     NULL, NULL, NULL); CHKERRQ(ierr);
644   ierr = VecZeroEntries(Qdotloc); CHKERRQ(ierr);
645   ierr = DMGlobalToLocal(user->dm, Qdot, INSERT_VALUES, Qdotloc); CHKERRQ(ierr);
646   ierr = VecZeroEntries(Gloc); CHKERRQ(ierr);
647 
648   // Ceed Vectors
649   ierr = VecGetArrayRead(Qloc, &q); CHKERRQ(ierr);
650   ierr = VecGetArrayRead(Qdotloc, &qdot); CHKERRQ(ierr);
651   ierr = VecGetArray(Gloc, &g); CHKERRQ(ierr);
652   CeedVectorSetArray(user->qceed, CEED_MEM_HOST, CEED_USE_POINTER,
653                      (PetscScalar *)q);
654   CeedVectorSetArray(user->qdotceed, CEED_MEM_HOST, CEED_USE_POINTER,
655                      (PetscScalar *)qdot);
656   CeedVectorSetArray(user->gceed, CEED_MEM_HOST, CEED_USE_POINTER, g);
657 
658   // Apply CEED operator
659   CeedOperatorApply(user->op_ifunction, user->qceed, user->gceed,
660                     CEED_REQUEST_IMMEDIATE);
661 
662   // Restore vectors
663   ierr = VecRestoreArrayRead(Qloc, &q); CHKERRQ(ierr);
664   ierr = VecRestoreArrayRead(Qdotloc, &qdot); CHKERRQ(ierr);
665   ierr = VecRestoreArray(Gloc, &g); CHKERRQ(ierr);
666 
667   ierr = VecZeroEntries(G); CHKERRQ(ierr);
668   ierr = DMLocalToGlobal(user->dm, Gloc, ADD_VALUES, G); CHKERRQ(ierr);
669 
670   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
671   ierr = DMRestoreLocalVector(user->dm, &Qdotloc); CHKERRQ(ierr);
672   ierr = DMRestoreLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 // User provided TS Monitor
677 static PetscErrorCode TSMonitor_NS(TS ts, PetscInt stepno, PetscReal time,
678                                    Vec Q, void *ctx) {
679   User user = ctx;
680   Vec Qloc;
681   char filepath[PETSC_MAX_PATH_LEN];
682   PetscViewer viewer;
683   PetscErrorCode ierr;
684 
685   // Set up output
686   PetscFunctionBeginUser;
687   // Print every 'outputfreq' steps
688   if (stepno % user->outputfreq != 0)
689     PetscFunctionReturn(0);
690   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
691   ierr = PetscObjectSetName((PetscObject)Qloc, "StateVec"); CHKERRQ(ierr);
692   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
693   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
694 
695   // Output
696   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-%03D.vtu",
697                        user->outputdir, stepno + user->contsteps);
698   CHKERRQ(ierr);
699   ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), filepath,
700                             FILE_MODE_WRITE, &viewer); CHKERRQ(ierr);
701   ierr = VecView(Qloc, viewer); CHKERRQ(ierr);
702   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
703   if (user->dmviz) {
704     Vec Qrefined, Qrefined_loc;
705     char filepath_refined[PETSC_MAX_PATH_LEN];
706     PetscViewer viewer_refined;
707 
708     ierr = DMGetGlobalVector(user->dmviz, &Qrefined); CHKERRQ(ierr);
709     ierr = DMGetLocalVector(user->dmviz, &Qrefined_loc); CHKERRQ(ierr);
710     ierr = PetscObjectSetName((PetscObject)Qrefined_loc, "Refined");
711     CHKERRQ(ierr);
712     ierr = MatInterpolate(user->interpviz, Q, Qrefined); CHKERRQ(ierr);
713     ierr = VecZeroEntries(Qrefined_loc); CHKERRQ(ierr);
714     ierr = DMGlobalToLocal(user->dmviz, Qrefined, INSERT_VALUES, Qrefined_loc);
715     CHKERRQ(ierr);
716     ierr = PetscSNPrintf(filepath_refined, sizeof filepath_refined,
717                          "%s/nsrefined-%03D.vtu",
718                          user->outputdir, stepno + user->contsteps);
719     CHKERRQ(ierr);
720     ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Qrefined),
721                               filepath_refined,
722                               FILE_MODE_WRITE, &viewer_refined); CHKERRQ(ierr);
723     ierr = VecView(Qrefined_loc, viewer_refined); CHKERRQ(ierr);
724     ierr = DMRestoreLocalVector(user->dmviz, &Qrefined_loc); CHKERRQ(ierr);
725     ierr = DMRestoreGlobalVector(user->dmviz, &Qrefined); CHKERRQ(ierr);
726     ierr = PetscViewerDestroy(&viewer_refined); CHKERRQ(ierr);
727   }
728   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
729 
730   // Save data in a binary file for continuation of simulations
731   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-solution.bin",
732                        user->outputdir); CHKERRQ(ierr);
733   ierr = PetscViewerBinaryOpen(user->comm, filepath, FILE_MODE_WRITE, &viewer);
734   CHKERRQ(ierr);
735   ierr = VecView(Q, viewer); CHKERRQ(ierr);
736   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
737 
738   // Save time stamp
739   // Dimensionalize time back
740   time /= user->units->second;
741   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-time.bin",
742                        user->outputdir); CHKERRQ(ierr);
743   ierr = PetscViewerBinaryOpen(user->comm, filepath, FILE_MODE_WRITE, &viewer);
744   CHKERRQ(ierr);
745   #if PETSC_VERSION_GE(3,13,0)
746   ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL);
747   #else
748   ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL, true);
749   #endif
750   CHKERRQ(ierr);
751   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
752 
753   PetscFunctionReturn(0);
754 }
755 
756 static PetscErrorCode ICs_FixMultiplicity(CeedOperator op_ics,
757     CeedVector xcorners, CeedVector q0ceed, DM dm, Vec Qloc, Vec Q,
758     CeedElemRestriction restrictq, CeedQFunctionContext ctxSetup, CeedScalar time) {
759   PetscErrorCode ierr;
760   CeedVector multlvec;
761   Vec Multiplicity, MultiplicityLoc;
762 
763   SetupContext ctxSetupData;
764   CeedQFunctionContextGetData(ctxSetup, CEED_MEM_HOST, (void **)&ctxSetupData);
765   ctxSetupData->time = time;
766   CeedQFunctionContextRestoreData(ctxSetup, (void **)&ctxSetupData);
767 
768   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
769   ierr = VectorPlacePetscVec(q0ceed, Qloc); CHKERRQ(ierr);
770   CeedOperatorApply(op_ics, xcorners, q0ceed, CEED_REQUEST_IMMEDIATE);
771   ierr = VecZeroEntries(Q); CHKERRQ(ierr);
772   ierr = DMLocalToGlobal(dm, Qloc, ADD_VALUES, Q); CHKERRQ(ierr);
773 
774   // Fix multiplicity for output of ICs
775   ierr = DMGetLocalVector(dm, &MultiplicityLoc); CHKERRQ(ierr);
776   CeedElemRestrictionCreateVector(restrictq, &multlvec, NULL);
777   ierr = VectorPlacePetscVec(multlvec, MultiplicityLoc); CHKERRQ(ierr);
778   CeedElemRestrictionGetMultiplicity(restrictq, multlvec);
779   CeedVectorDestroy(&multlvec);
780   ierr = DMGetGlobalVector(dm, &Multiplicity); CHKERRQ(ierr);
781   ierr = VecZeroEntries(Multiplicity); CHKERRQ(ierr);
782   ierr = DMLocalToGlobal(dm, MultiplicityLoc, ADD_VALUES, Multiplicity);
783   CHKERRQ(ierr);
784   ierr = VecPointwiseDivide(Q, Q, Multiplicity); CHKERRQ(ierr);
785   ierr = VecPointwiseDivide(Qloc, Qloc, MultiplicityLoc); CHKERRQ(ierr);
786   ierr = DMRestoreLocalVector(dm, &MultiplicityLoc); CHKERRQ(ierr);
787   ierr = DMRestoreGlobalVector(dm, &Multiplicity); CHKERRQ(ierr);
788 
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode ComputeLumpedMassMatrix(Ceed ceed, DM dm,
793     CeedElemRestriction restrictq, CeedBasis basisq,
794     CeedElemRestriction restrictqdi, CeedVector qdata, Vec M) {
795   PetscErrorCode ierr;
796   CeedQFunction qf_mass;
797   CeedOperator op_mass;
798   CeedVector mceed;
799   Vec Mloc;
800   CeedInt ncompq, qdatasize;
801 
802   PetscFunctionBeginUser;
803   CeedElemRestrictionGetNumComponents(restrictq, &ncompq);
804   CeedElemRestrictionGetNumComponents(restrictqdi, &qdatasize);
805   // Create the Q-function that defines the action of the mass operator
806   CeedQFunctionCreateInterior(ceed, 1, Mass, Mass_loc, &qf_mass);
807   CeedQFunctionAddInput(qf_mass, "q", ncompq, CEED_EVAL_INTERP);
808   CeedQFunctionAddInput(qf_mass, "qdata", qdatasize, CEED_EVAL_NONE);
809   CeedQFunctionAddOutput(qf_mass, "v", ncompq, CEED_EVAL_INTERP);
810 
811   // Create the mass operator
812   CeedOperatorCreate(ceed, qf_mass, NULL, NULL, &op_mass);
813   CeedOperatorSetField(op_mass, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
814   CeedOperatorSetField(op_mass, "qdata", restrictqdi,
815                        CEED_BASIS_COLLOCATED, qdata);
816   CeedOperatorSetField(op_mass, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
817 
818   ierr = DMGetLocalVector(dm, &Mloc); CHKERRQ(ierr);
819   ierr = VecZeroEntries(Mloc); CHKERRQ(ierr);
820   CeedElemRestrictionCreateVector(restrictq, &mceed, NULL);
821   ierr = VectorPlacePetscVec(mceed, Mloc); CHKERRQ(ierr);
822 
823   {
824     // Compute a lumped mass matrix
825     CeedVector onesvec;
826     CeedElemRestrictionCreateVector(restrictq, &onesvec, NULL);
827     CeedVectorSetValue(onesvec, 1.0);
828     CeedOperatorApply(op_mass, onesvec, mceed, CEED_REQUEST_IMMEDIATE);
829     CeedVectorDestroy(&onesvec);
830     CeedOperatorDestroy(&op_mass);
831     CeedVectorDestroy(&mceed);
832   }
833   CeedQFunctionDestroy(&qf_mass);
834 
835   ierr = VecZeroEntries(M); CHKERRQ(ierr);
836   ierr = DMLocalToGlobal(dm, Mloc, ADD_VALUES, M); CHKERRQ(ierr);
837   ierr = DMRestoreLocalVector(dm, &Mloc); CHKERRQ(ierr);
838 
839   // Invert diagonally lumped mass vector for RHS function
840   ierr = VecReciprocal(M); CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 static PetscErrorCode SetUpDM(DM dm, problemData *problem, PetscInt degree,
845                               SimpleBC bc, void *ctxSetupData) {
846   PetscErrorCode ierr;
847 
848   PetscFunctionBeginUser;
849   {
850     // Configure the finite element space and boundary conditions
851     PetscFE fe;
852     PetscInt ncompq = 5;
853     ierr = PetscFECreateLagrange(PETSC_COMM_SELF, problem->dim, ncompq,
854                                  PETSC_FALSE, degree, PETSC_DECIDE,
855                                  &fe); CHKERRQ(ierr);
856     ierr = PetscObjectSetName((PetscObject)fe, "Q"); CHKERRQ(ierr);
857     ierr = DMAddField(dm, NULL,(PetscObject)fe); CHKERRQ(ierr);
858     ierr = DMCreateDS(dm); CHKERRQ(ierr);
859     {
860       PetscInt comps[1] = {1};
861       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipx", "Face Sets", 0,
862                            1, comps, (void(*)(void))NULL, NULL, bc->nslip[0],
863                            bc->slips[0], ctxSetupData); CHKERRQ(ierr);
864       comps[0] = 2;
865       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipy", "Face Sets", 0,
866                            1, comps, (void(*)(void))NULL, NULL, bc->nslip[1],
867                            bc->slips[1], ctxSetupData); CHKERRQ(ierr);
868       comps[0] = 3;
869       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipz", "Face Sets", 0,
870                            1, comps, (void(*)(void))NULL, NULL, bc->nslip[2],
871                            bc->slips[2], ctxSetupData); CHKERRQ(ierr);
872     }
873     if (bc->userbc == PETSC_TRUE) {
874       for (PetscInt c = 0; c < 3; c++) {
875         for (PetscInt s = 0; s < bc->nslip[c]; s++) {
876           for (PetscInt w = 0; w < bc->nwall; w++) {
877             if (bc->slips[c][s] == bc->walls[w])
878               SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,
879                        "Boundary condition already set on face %D!\n",
880                        bc->walls[w]);
881 
882           }
883         }
884       }
885     }
886     // Wall boundary conditions are zero energy density and zero flux for
887     //   velocity in advection/advection2d, and zero velocity and zero flux
888     //   for mass density and energy density in density_current
889     {
890       if (problem->bc == Exact_Advection || problem->bc == Exact_Advection2d) {
891         PetscInt comps[1] = {4};
892         ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", "Face Sets", 0,
893                              1, comps, (void(*)(void))problem->bc, NULL,
894                              bc->nwall, bc->walls, ctxSetupData); CHKERRQ(ierr);
895       } else if (problem->bc == Exact_DC) {
896         PetscInt comps[3] = {1, 2, 3};
897         ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", "Face Sets", 0,
898                              3, comps, (void(*)(void))problem->bc, NULL,
899                              bc->nwall, bc->walls, ctxSetupData); CHKERRQ(ierr);
900       } else
901         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_NULL,
902                 "Undefined boundary conditions for this problem");
903     }
904     ierr = DMPlexSetClosurePermutationTensor(dm, PETSC_DETERMINE, NULL);
905     CHKERRQ(ierr);
906     ierr = PetscFEDestroy(&fe); CHKERRQ(ierr);
907   }
908   {
909     // Empty name for conserved field (because there is only one field)
910     PetscSection section;
911     ierr = DMGetLocalSection(dm, &section); CHKERRQ(ierr);
912     ierr = PetscSectionSetFieldName(section, 0, ""); CHKERRQ(ierr);
913     ierr = PetscSectionSetComponentName(section, 0, 0, "Density");
914     CHKERRQ(ierr);
915     ierr = PetscSectionSetComponentName(section, 0, 1, "MomentumX");
916     CHKERRQ(ierr);
917     ierr = PetscSectionSetComponentName(section, 0, 2, "MomentumY");
918     CHKERRQ(ierr);
919     ierr = PetscSectionSetComponentName(section, 0, 3, "MomentumZ");
920     CHKERRQ(ierr);
921     ierr = PetscSectionSetComponentName(section, 0, 4, "EnergyDensity");
922     CHKERRQ(ierr);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 int main(int argc, char **argv) {
928   PetscInt ierr;
929   MPI_Comm comm;
930   DM dm, dmcoord, dmviz;
931   Mat interpviz;
932   TS ts;
933   TSAdapt adapt;
934   User user;
935   Units units;
936   char ceedresource[4096] = "/cpu/self";
937   PetscInt localNelemVol, lnodes, gnodes, steps;
938   const PetscInt ncompq = 5;
939   PetscMPIInt rank;
940   PetscScalar ftime;
941   Vec Q, Qloc, Xloc;
942   Ceed ceed;
943   CeedInt numP, numQ;
944   CeedVector xcorners, qdata, q0ceed;
945   CeedBasis basisx, basisxc, basisq;
946   CeedElemRestriction restrictx, restrictq, restrictqdi;
947   CeedQFunction qf_setupVol, qf_ics, qf_rhsVol, qf_ifunctionVol;
948   CeedQFunctionContext ctxSetup, ctxNS, ctxAdvection2d, ctxSurface;
949   CeedOperator op_setupVol, op_ics;
950   CeedScalar Rd;
951   CeedMemType memtyperequested;
952   PetscScalar WpermK, Pascal, JperkgK, mpersquareds, kgpercubicm,
953               kgpersquaredms, Joulepercubicm, Joule;
954   problemType problemChoice;
955   problemData *problem = NULL;
956   WindType wind_type;
957   StabilizationType stab;
958   PetscBool implicit;
959   PetscInt    viz_refine = 0;
960   struct SimpleBC_ bc = {
961     .nslip = {2, 2, 2},
962     .slips = {{5, 6}, {3, 4}, {1, 2}}
963   };
964   double start, cpu_time_used;
965   // Test variables
966   PetscBool test;
967   PetscScalar testtol = 0.;
968   char filepath[PETSC_MAX_PATH_LEN];
969   // Check PETSc CUDA support
970   PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE;
971   // *INDENT-OFF*
972   #ifdef PETSC_HAVE_CUDA
973   petschavecuda = PETSC_TRUE;
974   #else
975   petschavecuda = PETSC_FALSE;
976   #endif
977   // *INDENT-ON*
978 
979   // Create the libCEED contexts
980   PetscScalar meter          = 1e-2;     // 1 meter in scaled length units
981   PetscScalar second         = 1e-2;     // 1 second in scaled time units
982   PetscScalar kilogram       = 1e-6;     // 1 kilogram in scaled mass units
983   PetscScalar Kelvin         = 1;        // 1 Kelvin in scaled temperature units
984   CeedScalar theta0          = 300.;     // K
985   CeedScalar thetaC          = -15.;     // K
986   CeedScalar P0              = 1.e5;     // Pa
987   CeedScalar E_wind          = 1.e6;     // J
988   CeedScalar N               = 0.01;     // 1/s
989   CeedScalar cv              = 717.;     // J/(kg K)
990   CeedScalar cp              = 1004.;    // J/(kg K)
991   CeedScalar vortex_strength = 5.;       // -
992   CeedScalar g               = 9.81;     // m/s^2
993   CeedScalar lambda          = -2./3.;   // -
994   CeedScalar mu              = 75.;      // Pa s, dynamic viscosity
995   // mu = 75 is not physical for air, but is good for numerical stability
996   CeedScalar k               = 0.02638;  // W/(m K)
997   CeedScalar CtauS           = 0.;       // dimensionless
998   CeedScalar strong_form     = 0.;       // [0,1]
999   PetscScalar lx             = 8000.;    // m
1000   PetscScalar ly             = 8000.;    // m
1001   PetscScalar lz             = 4000.;    // m
1002   CeedScalar rc              = 1000.;    // m (Radius of bubble)
1003   PetscScalar resx           = 1000.;    // m (resolution in x)
1004   PetscScalar resy           = 1000.;    // m (resolution in y)
1005   PetscScalar resz           = 1000.;    // m (resolution in z)
1006   PetscInt outputfreq        = 10;       // -
1007   PetscInt contsteps         = 0;        // -
1008   PetscInt degree            = 1;        // -
1009   PetscInt qextra            = 2;        // -
1010   PetscInt qextraSur         = 2;        // -
1011   PetscReal center[3], dc_axis[3] = {0, 0, 0}, wind[3] = {1., 0, 0};
1012 
1013   ierr = PetscInitialize(&argc, &argv, NULL, help);
1014   if (ierr) return ierr;
1015 
1016   // Allocate PETSc context
1017   ierr = PetscCalloc1(1, &user); CHKERRQ(ierr);
1018   ierr = PetscMalloc1(1, &units); CHKERRQ(ierr);
1019 
1020   // Parse command line options
1021   comm = PETSC_COMM_WORLD;
1022   ierr = PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
1023                            NULL); CHKERRQ(ierr);
1024   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
1025                             NULL, ceedresource, ceedresource,
1026                             sizeof(ceedresource), NULL); CHKERRQ(ierr);
1027   ierr = PetscOptionsBool("-test", "Run in test mode",
1028                           NULL, test=PETSC_FALSE, &test, NULL); CHKERRQ(ierr);
1029   ierr = PetscOptionsScalar("-compare_final_state_atol",
1030                             "Test absolute tolerance",
1031                             NULL, testtol, &testtol, NULL); CHKERRQ(ierr);
1032   ierr = PetscOptionsString("-compare_final_state_filename", "Test filename",
1033                             NULL, filepath, filepath,
1034                             sizeof(filepath), NULL); CHKERRQ(ierr);
1035   problemChoice = NS_DENSITY_CURRENT;
1036   ierr = PetscOptionsEnum("-problem", "Problem to solve", NULL,
1037                           problemTypes, (PetscEnum)problemChoice,
1038                           (PetscEnum *)&problemChoice, NULL); CHKERRQ(ierr);
1039   problem = &problemOptions[problemChoice];
1040   ierr = PetscOptionsEnum("-problem_advection_wind", "Wind type in Advection",
1041                           NULL, WindTypes,
1042                           (PetscEnum)(wind_type = ADVECTION_WIND_ROTATION),
1043                           (PetscEnum *)&wind_type, NULL); CHKERRQ(ierr);
1044   PetscInt n = problem->dim;
1045   PetscBool userWind;
1046   ierr = PetscOptionsRealArray("-problem_advection_wind_translation",
1047                                "Constant wind vector",
1048                                NULL, wind, &n, &userWind); CHKERRQ(ierr);
1049   if (wind_type == ADVECTION_WIND_ROTATION && userWind) {
1050     ierr = PetscPrintf(comm,
1051                        "Warning! Use -problem_advection_wind_translation only with -problem_advection_wind translation\n");
1052     CHKERRQ(ierr);
1053   }
1054   if (wind_type == ADVECTION_WIND_TRANSLATION        // TODO: add euler as well
1055       && problemChoice == NS_DENSITY_CURRENT) {
1056     SETERRQ(comm, PETSC_ERR_ARG_INCOMP,
1057             "-problem_advection_wind translation is not defined for -problem density_current");
1058   }
1059   ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
1060                           StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
1061                           (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
1062   ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
1063                           NULL, implicit=PETSC_FALSE, &implicit, NULL);
1064   CHKERRQ(ierr);
1065   if (!implicit && stab != STAB_NONE) {
1066     ierr = PetscPrintf(comm, "Warning! Use -stab only with -implicit\n");
1067     CHKERRQ(ierr);
1068   }
1069   {
1070     PetscInt len;
1071     PetscBool flg;
1072     ierr = PetscOptionsIntArray("-bc_wall",
1073                                 "Use wall boundary conditions on this list of faces",
1074                                 NULL, bc.walls,
1075                                 (len = sizeof(bc.walls) / sizeof(bc.walls[0]),
1076                                  &len), &flg); CHKERRQ(ierr);
1077     if (flg) {
1078       bc.nwall = len;
1079       // Using a no-slip wall disables automatic slip walls (they must be set explicitly)
1080       bc.nslip[0] = bc.nslip[1] = bc.nslip[2] = 0;
1081     }
1082     for (PetscInt j=0; j<3; j++) {
1083       const char *flags[3] = {"-bc_slip_x", "-bc_slip_y", "-bc_slip_z"};
1084       ierr = PetscOptionsIntArray(flags[j],
1085                                   "Use slip boundary conditions on this list of faces",
1086                                   NULL, bc.slips[j],
1087                                   (len = sizeof(bc.slips[j]) / sizeof(bc.slips[j][0]),
1088                                    &len), &flg);
1089       CHKERRQ(ierr);
1090       if (flg) {
1091         bc.nslip[j] = len;
1092         bc.userbc = PETSC_TRUE;
1093       }
1094     }
1095   }
1096   ierr = PetscOptionsInt("-viz_refine",
1097                          "Regular refinement levels for visualization",
1098                          NULL, viz_refine, &viz_refine, NULL);
1099   CHKERRQ(ierr);
1100   ierr = PetscOptionsScalar("-units_meter", "1 meter in scaled length units",
1101                             NULL, meter, &meter, NULL); CHKERRQ(ierr);
1102   meter = fabs(meter);
1103   ierr = PetscOptionsScalar("-units_second","1 second in scaled time units",
1104                             NULL, second, &second, NULL); CHKERRQ(ierr);
1105   second = fabs(second);
1106   ierr = PetscOptionsScalar("-units_kilogram","1 kilogram in scaled mass units",
1107                             NULL, kilogram, &kilogram, NULL); CHKERRQ(ierr);
1108   kilogram = fabs(kilogram);
1109   ierr = PetscOptionsScalar("-units_Kelvin",
1110                             "1 Kelvin in scaled temperature units",
1111                             NULL, Kelvin, &Kelvin, NULL); CHKERRQ(ierr);
1112   Kelvin = fabs(Kelvin);
1113   ierr = PetscOptionsScalar("-theta0", "Reference potential temperature",
1114                             NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
1115   ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
1116                             NULL, thetaC, &thetaC, NULL); CHKERRQ(ierr);
1117   ierr = PetscOptionsScalar("-P0", "Atmospheric pressure",
1118                             NULL, P0, &P0, NULL); CHKERRQ(ierr);
1119   ierr = PetscOptionsScalar("-E_wind", "Total energy of inflow wind",
1120                             NULL, E_wind, &E_wind, NULL); CHKERRQ(ierr);
1121   ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency",
1122                             NULL, N, &N, NULL); CHKERRQ(ierr);
1123   ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume",
1124                             NULL, cv, &cv, NULL); CHKERRQ(ierr);
1125   ierr = PetscOptionsScalar("-cp", "Heat capacity at constant pressure",
1126                             NULL, cp, &cp, NULL); CHKERRQ(ierr);
1127   PetscBool userVortex;
1128   ierr = PetscOptionsScalar("-vortex_strength", "Strength of Vortex",
1129                             NULL, vortex_strength, &vortex_strength, &userVortex);
1130   CHKERRQ(ierr);
1131   if (problemChoice != NS_EULER_VORTEX && userVortex) {
1132     ierr = PetscPrintf(comm,
1133                        "Warning! Use -vortex_strength only with -problem euler_vortex\n");
1134     CHKERRQ(ierr);
1135   }
1136   ierr = PetscOptionsScalar("-g", "Gravitational acceleration",
1137                             NULL, g, &g, NULL); CHKERRQ(ierr);
1138   ierr = PetscOptionsScalar("-lambda",
1139                             "Stokes hypothesis second viscosity coefficient",
1140                             NULL, lambda, &lambda, NULL); CHKERRQ(ierr);
1141   ierr = PetscOptionsScalar("-mu", "Shear dynamic viscosity coefficient",
1142                             NULL, mu, &mu, NULL); CHKERRQ(ierr);
1143   ierr = PetscOptionsScalar("-k", "Thermal conductivity",
1144                             NULL, k, &k, NULL); CHKERRQ(ierr);
1145   ierr = PetscOptionsScalar("-CtauS",
1146                             "Scale coefficient for tau (nondimensional)",
1147                             NULL, CtauS, &CtauS, NULL); CHKERRQ(ierr);
1148   if (stab == STAB_NONE && CtauS != 0) {
1149     ierr = PetscPrintf(comm,
1150                        "Warning! Use -CtauS only with -stab su or -stab supg\n");
1151     CHKERRQ(ierr);
1152   }
1153   ierr = PetscOptionsScalar("-strong_form",
1154                             "Strong (1) or weak/integrated by parts (0) advection residual",
1155                             NULL, strong_form, &strong_form, NULL);
1156   CHKERRQ(ierr);
1157   if (problemChoice == NS_DENSITY_CURRENT && (CtauS != 0 || strong_form != 0)) {
1158     ierr = PetscPrintf(comm,
1159                        "Warning! Problem density_current does not support -CtauS or -strong_form\n");
1160     CHKERRQ(ierr);
1161   }
1162   ierr = PetscOptionsScalar("-lx", "Length scale in x direction",
1163                             NULL, lx, &lx, NULL); CHKERRQ(ierr);
1164   ierr = PetscOptionsScalar("-ly", "Length scale in y direction",
1165                             NULL, ly, &ly, NULL); CHKERRQ(ierr);
1166   ierr = PetscOptionsScalar("-lz", "Length scale in z direction",
1167                             NULL, lz, &lz, NULL); CHKERRQ(ierr);
1168   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
1169                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
1170   ierr = PetscOptionsScalar("-resx","Target resolution in x",
1171                             NULL, resx, &resx, NULL); CHKERRQ(ierr);
1172   ierr = PetscOptionsScalar("-resy","Target resolution in y",
1173                             NULL, resy, &resy, NULL); CHKERRQ(ierr);
1174   ierr = PetscOptionsScalar("-resz","Target resolution in z",
1175                             NULL, resz, &resz, NULL); CHKERRQ(ierr);
1176   n = problem->dim;
1177   center[0] = 0.5 * lx;
1178   center[1] = 0.5 * ly;
1179   center[2] = 0.5 * lz;
1180   ierr = PetscOptionsRealArray("-center", "Location of bubble center",
1181                                NULL, center, &n, NULL); CHKERRQ(ierr);
1182   n = problem->dim;
1183   ierr = PetscOptionsRealArray("-dc_axis",
1184                                "Axis of density current cylindrical anomaly, or {0,0,0} for spherically symmetric",
1185                                NULL, dc_axis, &n, NULL); CHKERRQ(ierr);
1186   {
1187     PetscReal norm = PetscSqrtReal(PetscSqr(dc_axis[0]) +
1188                                    PetscSqr(dc_axis[1]) + PetscSqr(dc_axis[2]));
1189     if (norm > 0) {
1190       for (int i=0; i<3; i++) dc_axis[i] /= norm;
1191     }
1192   }
1193   ierr = PetscOptionsInt("-output_freq",
1194                          "Frequency of output, in number of steps",
1195                          NULL, outputfreq, &outputfreq, NULL); CHKERRQ(ierr);
1196   ierr = PetscOptionsInt("-continue", "Continue from previous solution",
1197                          NULL, contsteps, &contsteps, NULL); CHKERRQ(ierr);
1198   ierr = PetscOptionsInt("-degree", "Polynomial degree of finite elements",
1199                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
1200   ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
1201                          NULL, qextra, &qextra, NULL); CHKERRQ(ierr);
1202   PetscBool userQextraSur;
1203   ierr = PetscOptionsInt("-qextra_boundary",
1204                          "Number of extra quadrature points on in/outflow faces",
1205                          NULL, qextraSur, &qextraSur, &userQextraSur);
1206   CHKERRQ(ierr);
1207   if ((wind_type == ADVECTION_WIND_ROTATION
1208        || problemChoice == NS_DENSITY_CURRENT) && userQextraSur) {
1209     ierr = PetscPrintf(comm,
1210                        "Warning! Use -qextra_boundary only with -problem_advection_wind translation\n");
1211     CHKERRQ(ierr);
1212   }
1213   ierr = PetscStrncpy(user->outputdir, ".", 2); CHKERRQ(ierr);
1214   ierr = PetscOptionsString("-output_dir", "Output directory",
1215                             NULL, user->outputdir, user->outputdir,
1216                             sizeof(user->outputdir), NULL); CHKERRQ(ierr);
1217   memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST;
1218   ierr = PetscOptionsEnum("-memtype",
1219                           "CEED MemType requested", NULL,
1220                           memTypes, (PetscEnum)memtyperequested,
1221                           (PetscEnum *)&memtyperequested, &setmemtyperequest);
1222   CHKERRQ(ierr);
1223   ierr = PetscOptionsEnd(); CHKERRQ(ierr);
1224 
1225   // Define derived units
1226   Pascal = kilogram / (meter * PetscSqr(second));
1227   JperkgK =  PetscSqr(meter) / (PetscSqr(second) * Kelvin);
1228   mpersquareds = meter / PetscSqr(second);
1229   WpermK = kilogram * meter / (pow(second,3) * Kelvin);
1230   kgpercubicm = kilogram / pow(meter,3);
1231   kgpersquaredms = kilogram / (PetscSqr(meter) * second);
1232   Joulepercubicm = kilogram / (meter * PetscSqr(second));
1233   Joule = kilogram * PetscSqr(meter) / PetscSqr(second);
1234 
1235   // Scale variables to desired units
1236   theta0 *= Kelvin;
1237   thetaC *= Kelvin;
1238   P0 *= Pascal;
1239   E_wind *= Joule;
1240   N *= (1./second);
1241   cv *= JperkgK;
1242   cp *= JperkgK;
1243   Rd = cp - cv;
1244   g *= mpersquareds;
1245   mu *= Pascal * second;
1246   k *= WpermK;
1247   lx = fabs(lx) * meter;
1248   ly = fabs(ly) * meter;
1249   lz = fabs(lz) * meter;
1250   rc = fabs(rc) * meter;
1251   resx = fabs(resx) * meter;
1252   resy = fabs(resy) * meter;
1253   resz = fabs(resz) * meter;
1254   for (int i=0; i<3; i++) center[i] *= meter;
1255 
1256   const CeedInt dim = problem->dim, ncompx = problem->dim,
1257                 qdatasizeVol = problem->qdatasizeVol;
1258   // Set up the libCEED context
1259   struct SetupContext_ ctxSetupData = {
1260     .theta0 = theta0,
1261     .thetaC = thetaC,
1262     .P0 = P0,
1263     .N = N,
1264     .cv = cv,
1265     .cp = cp,
1266     .Rd = Rd,
1267     .g = g,
1268     .rc = rc,
1269     .lx = lx,
1270     .ly = ly,
1271     .lz = lz,
1272     .center[0] = center[0],
1273     .center[1] = center[1],
1274     .center[2] = center[2],
1275     .dc_axis[0] = dc_axis[0],
1276     .dc_axis[1] = dc_axis[1],
1277     .dc_axis[2] = dc_axis[2],
1278     .wind[0] = wind[0],
1279     .wind[1] = wind[1],
1280     .wind[2] = wind[2],
1281     .time = 0,
1282     .vortex_strength = vortex_strength,
1283     .wind_type = wind_type,
1284   };
1285 
1286   // Create the mesh
1287   {
1288     const PetscReal scale[3] = {lx, ly, lz};
1289     ierr = DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, NULL, NULL, scale,
1290                                NULL, PETSC_TRUE, &dm);
1291     CHKERRQ(ierr);
1292   }
1293 
1294   // Distribute the mesh over processes
1295   {
1296     DM               dmDist = NULL;
1297     PetscPartitioner part;
1298 
1299     ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr);
1300     ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr);
1301     ierr = DMPlexDistribute(dm, 0, NULL, &dmDist); CHKERRQ(ierr);
1302     if (dmDist) {
1303       ierr = DMDestroy(&dm); CHKERRQ(ierr);
1304       dm  = dmDist;
1305     }
1306   }
1307   ierr = DMViewFromOptions(dm, NULL, "-dm_view"); CHKERRQ(ierr);
1308 
1309   // Setup DM
1310   ierr = DMLocalizeCoordinates(dm); CHKERRQ(ierr);
1311   ierr = DMSetFromOptions(dm); CHKERRQ(ierr);
1312   ierr = SetUpDM(dm, problem, degree, &bc, &ctxSetupData); CHKERRQ(ierr);
1313 
1314   // Refine DM for high-order viz
1315   dmviz = NULL;
1316   interpviz = NULL;
1317   if (viz_refine) {
1318     DM dmhierarchy[viz_refine+1];
1319 
1320     ierr = DMPlexSetRefinementUniform(dm, PETSC_TRUE); CHKERRQ(ierr);
1321     dmhierarchy[0] = dm;
1322     for (PetscInt i = 0, d = degree; i < viz_refine; i++) {
1323       Mat interp_next;
1324 
1325       ierr = DMRefine(dmhierarchy[i], MPI_COMM_NULL, &dmhierarchy[i+1]);
1326       CHKERRQ(ierr);
1327       ierr = DMClearDS(dmhierarchy[i+1]); CHKERRQ(ierr);
1328       ierr = DMClearFields(dmhierarchy[i+1]); CHKERRQ(ierr);
1329       ierr = DMSetCoarseDM(dmhierarchy[i+1], dmhierarchy[i]); CHKERRQ(ierr);
1330       d = (d + 1) / 2;
1331       if (i + 1 == viz_refine) d = 1;
1332       ierr = SetUpDM(dmhierarchy[i+1], problem, d, &bc, &ctxSetupData);
1333       CHKERRQ(ierr);
1334       ierr = DMCreateInterpolation(dmhierarchy[i], dmhierarchy[i+1],
1335                                    &interp_next, NULL); CHKERRQ(ierr);
1336       if (!i) interpviz = interp_next;
1337       else {
1338         Mat C;
1339         ierr = MatMatMult(interp_next, interpviz, MAT_INITIAL_MATRIX,
1340                           PETSC_DECIDE, &C); CHKERRQ(ierr);
1341         ierr = MatDestroy(&interp_next); CHKERRQ(ierr);
1342         ierr = MatDestroy(&interpviz); CHKERRQ(ierr);
1343         interpviz = C;
1344       }
1345     }
1346     for (PetscInt i=1; i<viz_refine; i++) {
1347       ierr = DMDestroy(&dmhierarchy[i]); CHKERRQ(ierr);
1348     }
1349     dmviz = dmhierarchy[viz_refine];
1350   }
1351   ierr = DMCreateGlobalVector(dm, &Q); CHKERRQ(ierr);
1352   ierr = DMGetLocalVector(dm, &Qloc); CHKERRQ(ierr);
1353   ierr = VecGetSize(Qloc, &lnodes); CHKERRQ(ierr);
1354   lnodes /= ncompq;
1355 
1356   // Initialize CEED
1357   CeedInit(ceedresource, &ceed);
1358   // Set memtype
1359   CeedMemType memtypebackend;
1360   CeedGetPreferredMemType(ceed, &memtypebackend);
1361   // Check memtype compatibility
1362   if (!setmemtyperequest)
1363     memtyperequested = memtypebackend;
1364   else if (!petschavecuda && memtyperequested == CEED_MEM_DEVICE)
1365     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS,
1366              "PETSc was not built with CUDA. "
1367              "Requested MemType CEED_MEM_DEVICE is not supported.", NULL);
1368 
1369   // Set number of 1D nodes and quadrature points
1370   numP = degree + 1;
1371   numQ = numP + qextra;
1372 
1373   // Print summary
1374   if (!test) {
1375     CeedInt gdofs, odofs;
1376     int comm_size;
1377     char box_faces_str[PETSC_MAX_PATH_LEN] = "NONE";
1378     ierr = VecGetSize(Q, &gdofs); CHKERRQ(ierr);
1379     ierr = VecGetLocalSize(Q, &odofs); CHKERRQ(ierr);
1380     gnodes = gdofs/ncompq;
1381     ierr = MPI_Comm_size(comm, &comm_size); CHKERRQ(ierr);
1382     ierr = PetscOptionsGetString(NULL, NULL, "-dm_plex_box_faces", box_faces_str,
1383                                  sizeof(box_faces_str), NULL); CHKERRQ(ierr);
1384     const char *usedresource;
1385     CeedGetResource(ceed, &usedresource);
1386 
1387     ierr = PetscPrintf(comm,
1388                        "\n-- Navier-Stokes solver - libCEED + PETSc --\n"
1389                        "  rank(s)                              : %d\n"
1390                        "  Problem:\n"
1391                        "    Problem Name                       : %s\n"
1392                        "    Stabilization                      : %s\n"
1393                        "  PETSc:\n"
1394                        "    Box Faces                          : %s\n"
1395                        "  libCEED:\n"
1396                        "    libCEED Backend                    : %s\n"
1397                        "    libCEED Backend MemType            : %s\n"
1398                        "    libCEED User Requested MemType     : %s\n"
1399                        "  Mesh:\n"
1400                        "    Number of 1D Basis Nodes (P)       : %d\n"
1401                        "    Number of 1D Quadrature Points (Q) : %d\n"
1402                        "    Global DoFs                        : %D\n"
1403                        "    Owned DoFs                         : %D\n"
1404                        "    DoFs per node                      : %D\n"
1405                        "    Global nodes                       : %D\n"
1406                        "    Owned nodes                        : %D\n",
1407                        comm_size, problemTypes[problemChoice],
1408                        StabilizationTypes[stab], box_faces_str, usedresource,
1409                        CeedMemTypes[memtypebackend],
1410                        (setmemtyperequest) ?
1411                        CeedMemTypes[memtyperequested] : "none",
1412                        numP, numQ, gdofs, odofs, ncompq, gnodes, lnodes);
1413     CHKERRQ(ierr);
1414   }
1415 
1416   // Set up global mass vector
1417   ierr = VecDuplicate(Q, &user->M); CHKERRQ(ierr);
1418 
1419   // Set up libCEED
1420   // CEED Bases
1421   CeedInit(ceedresource, &ceed);
1422   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompq, numP, numQ, CEED_GAUSS,
1423                                   &basisq);
1424   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, numQ, CEED_GAUSS,
1425                                   &basisx);
1426   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, numP,
1427                                   CEED_GAUSS_LOBATTO, &basisxc);
1428   ierr = DMGetCoordinateDM(dm, &dmcoord); CHKERRQ(ierr);
1429   ierr = DMPlexSetClosurePermutationTensor(dmcoord, PETSC_DETERMINE, NULL);
1430   CHKERRQ(ierr);
1431 
1432   // CEED Restrictions
1433   ierr = GetRestrictionForDomain(ceed, dm, 0, 0, 0, numP, numQ,
1434                                  qdatasizeVol, &restrictq, &restrictx,
1435                                  &restrictqdi); CHKERRQ(ierr);
1436 
1437   ierr = DMGetCoordinatesLocal(dm, &Xloc); CHKERRQ(ierr);
1438   ierr = CreateVectorFromPetscVec(ceed, Xloc, &xcorners); CHKERRQ(ierr);
1439 
1440   // Create the CEED vectors that will be needed in setup
1441   CeedInt NqptsVol;
1442   CeedBasisGetNumQuadraturePoints(basisq, &NqptsVol);
1443   CeedElemRestrictionGetNumElements(restrictq, &localNelemVol);
1444   CeedVectorCreate(ceed, qdatasizeVol*localNelemVol*NqptsVol, &qdata);
1445   CeedElemRestrictionCreateVector(restrictq, &q0ceed, NULL);
1446 
1447   // Create the Q-function that builds the quadrature data for the NS operator
1448   CeedQFunctionCreateInterior(ceed, 1, problem->setupVol, problem->setupVol_loc,
1449                               &qf_setupVol);
1450   CeedQFunctionAddInput(qf_setupVol, "dx", ncompx*dim, CEED_EVAL_GRAD);
1451   CeedQFunctionAddInput(qf_setupVol, "weight", 1, CEED_EVAL_WEIGHT);
1452   CeedQFunctionAddOutput(qf_setupVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1453 
1454   // Create the Q-function that sets the ICs of the operator
1455   CeedQFunctionCreateInterior(ceed, 1, problem->ics, problem->ics_loc, &qf_ics);
1456   CeedQFunctionAddInput(qf_ics, "x", ncompx, CEED_EVAL_INTERP);
1457   CeedQFunctionAddOutput(qf_ics, "q0", ncompq, CEED_EVAL_NONE);
1458 
1459   qf_rhsVol = NULL;
1460   if (problem->applyVol_rhs) { // Create the Q-function that defines the action of the RHS operator
1461     CeedQFunctionCreateInterior(ceed, 1, problem->applyVol_rhs,
1462                                 problem->applyVol_rhs_loc, &qf_rhsVol);
1463     CeedQFunctionAddInput(qf_rhsVol, "q", ncompq, CEED_EVAL_INTERP);
1464     CeedQFunctionAddInput(qf_rhsVol, "dq", ncompq*dim, CEED_EVAL_GRAD);
1465     CeedQFunctionAddInput(qf_rhsVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1466     CeedQFunctionAddInput(qf_rhsVol, "x", ncompx, CEED_EVAL_INTERP);
1467     CeedQFunctionAddOutput(qf_rhsVol, "v", ncompq, CEED_EVAL_INTERP);
1468     CeedQFunctionAddOutput(qf_rhsVol, "dv", ncompq*dim, CEED_EVAL_GRAD);
1469   }
1470 
1471   qf_ifunctionVol = NULL;
1472   if (problem->applyVol_ifunction) { // Create the Q-function that defines the action of the IFunction
1473     CeedQFunctionCreateInterior(ceed, 1, problem->applyVol_ifunction,
1474                                 problem->applyVol_ifunction_loc, &qf_ifunctionVol);
1475     CeedQFunctionAddInput(qf_ifunctionVol, "q", ncompq, CEED_EVAL_INTERP);
1476     CeedQFunctionAddInput(qf_ifunctionVol, "dq", ncompq*dim, CEED_EVAL_GRAD);
1477     CeedQFunctionAddInput(qf_ifunctionVol, "qdot", ncompq, CEED_EVAL_INTERP);
1478     CeedQFunctionAddInput(qf_ifunctionVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1479     CeedQFunctionAddInput(qf_ifunctionVol, "x", ncompx, CEED_EVAL_INTERP);
1480     CeedQFunctionAddOutput(qf_ifunctionVol, "v", ncompq, CEED_EVAL_INTERP);
1481     CeedQFunctionAddOutput(qf_ifunctionVol, "dv", ncompq*dim, CEED_EVAL_GRAD);
1482   }
1483 
1484   // Create the operator that builds the quadrature data for the NS operator
1485   CeedOperatorCreate(ceed, qf_setupVol, NULL, NULL, &op_setupVol);
1486   CeedOperatorSetField(op_setupVol, "dx", restrictx, basisx, CEED_VECTOR_ACTIVE);
1487   CeedOperatorSetField(op_setupVol, "weight", CEED_ELEMRESTRICTION_NONE,
1488                        basisx, CEED_VECTOR_NONE);
1489   CeedOperatorSetField(op_setupVol, "qdata", restrictqdi,
1490                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1491 
1492   // Create the operator that sets the ICs
1493   CeedOperatorCreate(ceed, qf_ics, NULL, NULL, &op_ics);
1494   CeedOperatorSetField(op_ics, "x", restrictx, basisxc, CEED_VECTOR_ACTIVE);
1495   CeedOperatorSetField(op_ics, "q0", restrictq,
1496                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1497 
1498   CeedElemRestrictionCreateVector(restrictq, &user->qceed, NULL);
1499   CeedElemRestrictionCreateVector(restrictq, &user->qdotceed, NULL);
1500   CeedElemRestrictionCreateVector(restrictq, &user->gceed, NULL);
1501 
1502   if (qf_rhsVol) { // Create the RHS physics operator
1503     CeedOperator op;
1504     CeedOperatorCreate(ceed, qf_rhsVol, NULL, NULL, &op);
1505     CeedOperatorSetField(op, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
1506     CeedOperatorSetField(op, "dq", restrictq, basisq, CEED_VECTOR_ACTIVE);
1507     CeedOperatorSetField(op, "qdata", restrictqdi,
1508                          CEED_BASIS_COLLOCATED, qdata);
1509     CeedOperatorSetField(op, "x", restrictx, basisx, xcorners);
1510     CeedOperatorSetField(op, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
1511     CeedOperatorSetField(op, "dv", restrictq, basisq, CEED_VECTOR_ACTIVE);
1512     user->op_rhs_vol = op;
1513   }
1514 
1515   if (qf_ifunctionVol) { // Create the IFunction operator
1516     CeedOperator op;
1517     CeedOperatorCreate(ceed, qf_ifunctionVol, NULL, NULL, &op);
1518     CeedOperatorSetField(op, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
1519     CeedOperatorSetField(op, "dq", restrictq, basisq, CEED_VECTOR_ACTIVE);
1520     CeedOperatorSetField(op, "qdot", restrictq, basisq, user->qdotceed);
1521     CeedOperatorSetField(op, "qdata", restrictqdi,
1522                          CEED_BASIS_COLLOCATED, qdata);
1523     CeedOperatorSetField(op, "x", restrictx, basisx, xcorners);
1524     CeedOperatorSetField(op, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
1525     CeedOperatorSetField(op, "dv", restrictq, basisq, CEED_VECTOR_ACTIVE);
1526     user->op_ifunction_vol = op;
1527   }
1528 
1529   // Set up CEED for the boundaries
1530   CeedInt height = 1;
1531   CeedInt dimSur = dim - height;
1532   CeedInt numP_Sur = degree + 1;
1533   CeedInt numQ_Sur = numP_Sur + qextraSur;
1534   const CeedInt qdatasizeSur = problem->qdatasizeSur;
1535   CeedBasis basisxSur, basisxcSur, basisqSur;
1536   CeedInt NqptsSur;
1537   CeedQFunction qf_setupSur, qf_applySur, qf_applyIn, qf_applyOut;
1538 
1539   // CEED bases for the boundaries
1540   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompq, numP_Sur, numQ_Sur,
1541                                   CEED_GAUSS,
1542                                   &basisqSur);
1543   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompx, 2, numQ_Sur, CEED_GAUSS,
1544                                   &basisxSur);
1545   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompx, 2, numP_Sur,
1546                                   CEED_GAUSS_LOBATTO, &basisxcSur);
1547   CeedBasisGetNumQuadraturePoints(basisqSur, &NqptsSur);
1548 
1549   // Create the Q-function that builds the quadrature data for the Surface operator
1550   CeedQFunctionCreateInterior(ceed, 1, problem->setupSur, problem->setupSur_loc,
1551                               &qf_setupSur);
1552   CeedQFunctionAddInput(qf_setupSur, "dx", ncompx*dimSur, CEED_EVAL_GRAD);
1553   CeedQFunctionAddInput(qf_setupSur, "weight", 1, CEED_EVAL_WEIGHT);
1554   CeedQFunctionAddOutput(qf_setupSur, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1555 
1556   // Creat Q-Function for Boundaries
1557   qf_applySur = NULL;
1558   if (problem->applySur) {
1559     CeedQFunctionCreateInterior(ceed, 1, problem->applySur,
1560                                 problem->applySur_loc, &qf_applySur);
1561     CeedQFunctionAddInput(qf_applySur, "q", ncompq, CEED_EVAL_INTERP);
1562     CeedQFunctionAddInput(qf_applySur, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1563     CeedQFunctionAddInput(qf_applySur, "x", ncompx, CEED_EVAL_INTERP);
1564     CeedQFunctionAddOutput(qf_applySur, "v", ncompq, CEED_EVAL_INTERP);
1565   }
1566   qf_applyIn = NULL;
1567   if (problem->applyIn) {
1568     CeedQFunctionCreateInterior(ceed, 1, problem->applyIn,
1569                                 problem->applyIn_loc, &qf_applyIn);
1570     CeedQFunctionAddInput(qf_applyIn, "q", ncompq, CEED_EVAL_INTERP);
1571     CeedQFunctionAddInput(qf_applyIn, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1572     CeedQFunctionAddInput(qf_applyIn, "x", ncompx, CEED_EVAL_INTERP);
1573     CeedQFunctionAddOutput(qf_applyIn, "v", ncompq, CEED_EVAL_INTERP);
1574   }
1575   qf_applyOut = NULL;
1576   if (problem->applyOut) {
1577     CeedQFunctionCreateInterior(ceed, 1, problem->applyOut,
1578                                 problem->applyOut_loc, &qf_applyOut);
1579     CeedQFunctionAddInput(qf_applyOut, "q", ncompq, CEED_EVAL_INTERP);
1580     CeedQFunctionAddInput(qf_applyOut, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1581     CeedQFunctionAddInput(qf_applyOut, "x", ncompx, CEED_EVAL_INTERP);
1582     CeedQFunctionAddOutput(qf_applyOut, "v", ncompq, CEED_EVAL_INTERP);
1583   }
1584 
1585   // Create CEED Operator for the whole domain
1586   if (!implicit)
1587     ierr = CreateOperatorForDomain(ceed, dm, &bc, problemChoice, wind_type,
1588                                    user->op_rhs_vol,qf_applySur,
1589                                    qf_setupSur, qf_applyIn, qf_applyOut,
1590                                    height, numP_Sur, numQ_Sur,
1591                                    qdatasizeSur, NqptsSur, basisxSur,
1592                                    basisqSur, &user->op_rhs);
1593                                    CHKERRQ(ierr);
1594   if (implicit)
1595     ierr = CreateOperatorForDomain(ceed, dm, &bc, problemChoice, wind_type,
1596                                    user->op_ifunction_vol, qf_applySur,
1597                                    qf_setupSur, NULL, NULL,
1598                                    height, numP_Sur, numQ_Sur,
1599                                    qdatasizeSur, NqptsSur, basisxSur,
1600                                    basisqSur, &user->op_ifunction);
1601                                    CHKERRQ(ierr);
1602   // Set up contex for QFunctions
1603   CeedQFunctionContextCreate(ceed, &ctxSetup);
1604   CeedQFunctionContextSetData(ctxSetup, CEED_MEM_HOST, CEED_USE_POINTER,
1605                               sizeof ctxSetupData, &ctxSetupData);
1606   CeedQFunctionSetContext(qf_ics, ctxSetup);
1607 
1608   CeedScalar ctxNSData[8] = {lambda, mu, k, cv, cp, g, Rd};
1609   CeedQFunctionContextCreate(ceed, &ctxNS);
1610   CeedQFunctionContextSetData(ctxNS, CEED_MEM_HOST, CEED_USE_POINTER,
1611                               sizeof ctxNSData, &ctxNSData);
1612 
1613   struct Advection2dContext_ ctxAdvection2dData = {
1614     .CtauS = CtauS,
1615     .strong_form = strong_form,
1616     .stabilization = stab,
1617   };
1618   CeedQFunctionContextCreate(ceed, &ctxAdvection2d);
1619   CeedQFunctionContextSetData(ctxAdvection2d, CEED_MEM_HOST, CEED_USE_POINTER,
1620                               sizeof ctxAdvection2dData, &ctxAdvection2dData);
1621 
1622   struct SurfaceContext_ ctxSurfaceData = {
1623     .E_wind = E_wind,
1624     .strong_form = strong_form,
1625     .implicit = implicit,
1626   };
1627   CeedQFunctionContextCreate(ceed, &ctxSurface);
1628   CeedQFunctionContextSetData(ctxSurface, CEED_MEM_HOST, CEED_USE_POINTER,
1629                               sizeof ctxSurfaceData, &ctxSurfaceData);
1630 
1631   switch (problemChoice) {
1632   case NS_EULER_VORTEX:
1633   case NS_DENSITY_CURRENT:
1634     if (qf_rhsVol) CeedQFunctionSetContext(qf_rhsVol, ctxNS);
1635     if (qf_ifunctionVol) CeedQFunctionSetContext(qf_ifunctionVol, ctxNS);
1636     break;
1637   case NS_ADVECTION:
1638   case NS_ADVECTION2D:
1639     if (qf_rhsVol) CeedQFunctionSetContext(qf_rhsVol, ctxAdvection2d);
1640     if (qf_ifunctionVol) CeedQFunctionSetContext(qf_ifunctionVol, ctxAdvection2d);
1641     if (qf_applySur) CeedQFunctionSetContext(qf_applySur, ctxSurface);
1642   }
1643 
1644   // Set up PETSc context
1645   // Set up units structure
1646   units->meter = meter;
1647   units->kilogram = kilogram;
1648   units->second = second;
1649   units->Kelvin = Kelvin;
1650   units->Pascal = Pascal;
1651   units->JperkgK = JperkgK;
1652   units->mpersquareds = mpersquareds;
1653   units->WpermK = WpermK;
1654   units->kgpercubicm = kgpercubicm;
1655   units->kgpersquaredms = kgpersquaredms;
1656   units->Joulepercubicm = Joulepercubicm;
1657   units->Joule = Joule;
1658 
1659   // Set up user structure
1660   user->comm = comm;
1661   user->outputfreq = outputfreq;
1662   user->contsteps = contsteps;
1663   user->units = units;
1664   user->dm = dm;
1665   user->dmviz = dmviz;
1666   user->interpviz = interpviz;
1667   user->ceed = ceed;
1668 
1669   // Calculate qdata and ICs
1670   // Set up state global and local vectors
1671   ierr = VecZeroEntries(Q); CHKERRQ(ierr);
1672 
1673   ierr = VectorPlacePetscVec(q0ceed, Qloc); CHKERRQ(ierr);
1674 
1675   // Apply Setup Ceed Operators
1676   ierr = VectorPlacePetscVec(xcorners, Xloc); CHKERRQ(ierr);
1677   CeedOperatorApply(op_setupVol, xcorners, qdata, CEED_REQUEST_IMMEDIATE);
1678   ierr = ComputeLumpedMassMatrix(ceed, dm, restrictq, basisq, restrictqdi, qdata,
1679                                  user->M); CHKERRQ(ierr);
1680 
1681   ierr = ICs_FixMultiplicity(op_ics, xcorners, q0ceed, dm, Qloc, Q, restrictq,
1682                              ctxSetup, 0.0); CHKERRQ(ierr);
1683   if (1) { // Record boundary values from initial condition and override DMPlexInsertBoundaryValues()
1684     // We use this for the main simulation DM because the reference DMPlexInsertBoundaryValues() is very slow.  If we
1685     // disable this, we should still get the same results due to the problem->bc function, but with potentially much
1686     // slower execution.
1687     Vec Qbc;
1688     ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
1689     ierr = VecCopy(Qloc, Qbc); CHKERRQ(ierr);
1690     ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
1691     ierr = DMGlobalToLocal(dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
1692     ierr = VecAXPY(Qbc, -1., Qloc); CHKERRQ(ierr);
1693     ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
1694     ierr = PetscObjectComposeFunction((PetscObject)dm,
1695                                       "DMPlexInsertBoundaryValues_C", DMPlexInsertBoundaryValues_NS);
1696     CHKERRQ(ierr);
1697   }
1698 
1699   MPI_Comm_rank(comm, &rank);
1700   if (!rank) {ierr = PetscMkdir(user->outputdir); CHKERRQ(ierr);}
1701   // Gather initial Q values
1702   // In case of continuation of simulation, set up initial values from binary file
1703   if (contsteps) { // continue from existent solution
1704     PetscViewer viewer;
1705     char filepath[PETSC_MAX_PATH_LEN];
1706     // Read input
1707     ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-solution.bin",
1708                          user->outputdir);
1709     CHKERRQ(ierr);
1710     ierr = PetscViewerBinaryOpen(comm, filepath, FILE_MODE_READ, &viewer);
1711     CHKERRQ(ierr);
1712     ierr = VecLoad(Q, viewer); CHKERRQ(ierr);
1713     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1714   }
1715   ierr = DMRestoreLocalVector(dm, &Qloc); CHKERRQ(ierr);
1716 
1717 // Create and setup TS
1718   ierr = TSCreate(comm, &ts); CHKERRQ(ierr);
1719   ierr = TSSetDM(ts, dm); CHKERRQ(ierr);
1720   if (implicit) {
1721     ierr = TSSetType(ts, TSBDF); CHKERRQ(ierr);
1722     if (user->op_ifunction) {
1723       ierr = TSSetIFunction(ts, NULL, IFunction_NS, &user); CHKERRQ(ierr);
1724     } else {                    // Implicit integrators can fall back to using an RHSFunction
1725       ierr = TSSetRHSFunction(ts, NULL, RHS_NS, &user); CHKERRQ(ierr);
1726     }
1727   } else {
1728     if (!user->op_rhs) SETERRQ(comm, PETSC_ERR_ARG_NULL,
1729                                  "Problem does not provide RHSFunction");
1730     ierr = TSSetType(ts, TSRK); CHKERRQ(ierr);
1731     ierr = TSRKSetType(ts, TSRK5F); CHKERRQ(ierr);
1732     ierr = TSSetRHSFunction(ts, NULL, RHS_NS, &user); CHKERRQ(ierr);
1733   }
1734   ierr = TSSetMaxTime(ts, 500. * units->second); CHKERRQ(ierr);
1735   ierr = TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER); CHKERRQ(ierr);
1736   ierr = TSSetTimeStep(ts, 1.e-2 * units->second); CHKERRQ(ierr);
1737   if (test) {ierr = TSSetMaxSteps(ts, 10); CHKERRQ(ierr);}
1738   ierr = TSGetAdapt(ts, &adapt); CHKERRQ(ierr);
1739   ierr = TSAdaptSetStepLimits(adapt,
1740                               1.e-12 * units->second,
1741                               1.e2 * units->second); CHKERRQ(ierr);
1742   ierr = TSSetFromOptions(ts); CHKERRQ(ierr);
1743   if (!contsteps) { // print initial condition
1744     if (!test) {
1745       ierr = TSMonitor_NS(ts, 0, 0., Q, user); CHKERRQ(ierr);
1746     }
1747   } else { // continue from time of last output
1748     PetscReal time;
1749     PetscInt count;
1750     PetscViewer viewer;
1751     char filepath[PETSC_MAX_PATH_LEN];
1752     ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-time.bin",
1753                          user->outputdir); CHKERRQ(ierr);
1754     ierr = PetscViewerBinaryOpen(comm, filepath, FILE_MODE_READ, &viewer);
1755     CHKERRQ(ierr);
1756     ierr = PetscViewerBinaryRead(viewer, &time, 1, &count, PETSC_REAL);
1757     CHKERRQ(ierr);
1758     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1759     ierr = TSSetTime(ts, time * user->units->second); CHKERRQ(ierr);
1760   }
1761   if (!test) {
1762     ierr = TSMonitorSet(ts, TSMonitor_NS, user, NULL); CHKERRQ(ierr);
1763   }
1764 
1765   // Solve
1766   start = MPI_Wtime();
1767   ierr = PetscBarrier((PetscObject)ts); CHKERRQ(ierr);
1768   ierr = TSSolve(ts, Q); CHKERRQ(ierr);
1769   cpu_time_used = MPI_Wtime() - start;
1770   ierr = TSGetSolveTime(ts, &ftime); CHKERRQ(ierr);
1771   ierr = MPI_Allreduce(MPI_IN_PLACE, &cpu_time_used, 1, MPI_DOUBLE, MPI_MIN,
1772                        comm); CHKERRQ(ierr);
1773   if (!test) {
1774     ierr = PetscPrintf(PETSC_COMM_WORLD,
1775                        "Time taken for solution (sec): %g\n",
1776                        (double)cpu_time_used); CHKERRQ(ierr);
1777   }
1778 
1779   // Get error
1780   if (problem->non_zero_time && !test) {
1781     Vec Qexact, Qexactloc;
1782     PetscReal norm;
1783     ierr = DMCreateGlobalVector(dm, &Qexact); CHKERRQ(ierr);
1784     ierr = DMGetLocalVector(dm, &Qexactloc); CHKERRQ(ierr);
1785     ierr = VecGetSize(Qexactloc, &lnodes); CHKERRQ(ierr);
1786 
1787     ierr = ICs_FixMultiplicity(op_ics, xcorners, q0ceed, dm, Qexactloc, Qexact,
1788                                restrictq, ctxSetup, ftime); CHKERRQ(ierr);
1789 
1790     ierr = VecAXPY(Q, -1.0, Qexact);  CHKERRQ(ierr);
1791     ierr = VecNorm(Q, NORM_MAX, &norm); CHKERRQ(ierr);
1792     CeedVectorDestroy(&q0ceed);
1793     ierr = PetscPrintf(PETSC_COMM_WORLD,
1794                        "Max Error: %g\n",
1795                        (double)norm); CHKERRQ(ierr);
1796     // Clean up vectors
1797     ierr = DMRestoreLocalVector(dm, &Qexactloc); CHKERRQ(ierr);
1798     ierr = VecDestroy(&Qexact); CHKERRQ(ierr);
1799   }
1800 
1801   // Output Statistics
1802   ierr = TSGetStepNumber(ts, &steps); CHKERRQ(ierr);
1803   if (!test) {
1804     ierr = PetscPrintf(PETSC_COMM_WORLD,
1805                        "Time integrator took %D time steps to reach final time %g\n",
1806                        steps, (double)ftime); CHKERRQ(ierr);
1807   }
1808   // Output numerical values from command line
1809   ierr = VecViewFromOptions(Q, NULL, "-vec_view"); CHKERRQ(ierr);
1810 
1811   // Compare reference solution values with current test run for CI
1812   if (test) {
1813     PetscViewer viewer;
1814     // Read reference file
1815     Vec Qref;
1816     PetscReal error, Qrefnorm;
1817     ierr = VecDuplicate(Q, &Qref); CHKERRQ(ierr);
1818     ierr = PetscViewerBinaryOpen(comm, filepath, FILE_MODE_READ, &viewer);
1819     CHKERRQ(ierr);
1820     ierr = VecLoad(Qref, viewer); CHKERRQ(ierr);
1821     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1822 
1823     // Compute error with respect to reference solution
1824     ierr = VecAXPY(Q, -1.0, Qref);  CHKERRQ(ierr);
1825     ierr = VecNorm(Qref, NORM_MAX, &Qrefnorm); CHKERRQ(ierr);
1826     ierr = VecScale(Q, 1./Qrefnorm); CHKERRQ(ierr);
1827     ierr = VecNorm(Q, NORM_MAX, &error); CHKERRQ(ierr);
1828     ierr = VecDestroy(&Qref); CHKERRQ(ierr);
1829     // Check error
1830     if (error > testtol) {
1831       ierr = PetscPrintf(PETSC_COMM_WORLD,
1832                          "Test failed with error norm %g\n",
1833                          (double)error); CHKERRQ(ierr);
1834     }
1835     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1836   }
1837 
1838   // Clean up libCEED
1839   CeedVectorDestroy(&qdata);
1840   CeedVectorDestroy(&user->qceed);
1841   CeedVectorDestroy(&user->qdotceed);
1842   CeedVectorDestroy(&user->gceed);
1843   CeedVectorDestroy(&xcorners);
1844   CeedBasisDestroy(&basisq);
1845   CeedBasisDestroy(&basisx);
1846   CeedBasisDestroy(&basisxc);
1847   CeedElemRestrictionDestroy(&restrictq);
1848   CeedElemRestrictionDestroy(&restrictx);
1849   CeedElemRestrictionDestroy(&restrictqdi);
1850   CeedQFunctionDestroy(&qf_setupVol);
1851   CeedQFunctionDestroy(&qf_ics);
1852   CeedQFunctionDestroy(&qf_rhsVol);
1853   CeedQFunctionDestroy(&qf_ifunctionVol);
1854   CeedQFunctionContextDestroy(&ctxSetup);
1855   CeedQFunctionContextDestroy(&ctxNS);
1856   CeedQFunctionContextDestroy(&ctxAdvection2d);
1857   CeedQFunctionContextDestroy(&ctxSurface);
1858   CeedOperatorDestroy(&op_setupVol);
1859   CeedOperatorDestroy(&op_ics);
1860   CeedOperatorDestroy(&user->op_rhs_vol);
1861   CeedOperatorDestroy(&user->op_ifunction_vol);
1862   CeedDestroy(&ceed);
1863   CeedBasisDestroy(&basisqSur);
1864   CeedBasisDestroy(&basisxSur);
1865   CeedBasisDestroy(&basisxcSur);
1866   CeedQFunctionDestroy(&qf_setupSur);
1867   CeedQFunctionDestroy(&qf_applySur);
1868   CeedOperatorDestroy(&user->op_rhs);
1869   CeedOperatorDestroy(&user->op_ifunction);
1870 
1871   // Clean up PETSc
1872   ierr = VecDestroy(&Q); CHKERRQ(ierr);
1873   ierr = VecDestroy(&user->M); CHKERRQ(ierr);
1874   ierr = MatDestroy(&interpviz); CHKERRQ(ierr);
1875   ierr = DMDestroy(&dmviz); CHKERRQ(ierr);
1876   ierr = TSDestroy(&ts); CHKERRQ(ierr);
1877   ierr = DMDestroy(&dm); CHKERRQ(ierr);
1878   ierr = PetscFree(units); CHKERRQ(ierr);
1879   ierr = PetscFree(user); CHKERRQ(ierr);
1880   return PetscFinalize();
1881 }
1882