xref: /honee/qfunctions/newtonian.h (revision bb8a0c61f21224cefcdd60e71004bb99df1e9a58)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 
18 #ifndef M_PI
19 #define M_PI    3.14159265358979323846
20 #endif
21 
22 #ifndef setup_context_struct
23 #define setup_context_struct
24 typedef struct SetupContext_ *SetupContext;
25 struct SetupContext_ {
26   CeedScalar theta0;
27   CeedScalar thetaC;
28   CeedScalar P0;
29   CeedScalar N;
30   CeedScalar cv;
31   CeedScalar cp;
32   CeedScalar g[3];
33   CeedScalar rc;
34   CeedScalar lx;
35   CeedScalar ly;
36   CeedScalar lz;
37   CeedScalar center[3];
38   CeedScalar dc_axis[3];
39   CeedScalar wind[3];
40   CeedScalar time;
41   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
42   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
43   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
44 };
45 #endif
46 
47 #ifndef newtonian_context_struct
48 #define newtonian_context_struct
49 typedef enum {
50   STAB_NONE = 0,
51   STAB_SU   = 1, // Streamline Upwind
52   STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
53 } StabilizationType;
54 
55 typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
56 struct NewtonianIdealGasContext_ {
57   CeedScalar lambda;
58   CeedScalar mu;
59   CeedScalar k;
60   CeedScalar cv;
61   CeedScalar cp;
62   CeedScalar g[3];
63   CeedScalar c_tau;
64   CeedScalar Ctau_t;
65   CeedScalar Ctau_v;
66   CeedScalar Ctau_C;
67   CeedScalar Ctau_M;
68   CeedScalar Ctau_E;
69   CeedScalar dt;
70   StabilizationType stabilization;
71 };
72 #endif
73 
74 // *****************************************************************************
75 // Helper function for computing flux Jacobian
76 // *****************************************************************************
77 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
78     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
79     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
80   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
81   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
82   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
83     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
84       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
85                       u[i]*u[j];
86       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
87         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
88         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
89                           ((i==k) ? u[j] : 0.) -
90                           ((i==j) ? u[k] : 0.) * (gamma-1.);
91         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
92                           (gamma-1.)*u[i]*u[k];
93       }
94       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
95     }
96     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
97     dF[i][4][4] = u[i] * gamma;
98   }
99 }
100 
101 // *****************************************************************************
102 // Helper function for computing flux Jacobian of Primitive variables
103 // *****************************************************************************
104 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
105     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
106     const CeedScalar Rd, const CeedScalar cv) {
107   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
108   // TODO Add in gravity's contribution
109 
110   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
111   CeedScalar drdT = -rho / T;
112   CeedScalar drdP = 1. / ( Rd * T);
113   CeedScalar etot =  E / rho ;
114   CeedScalar e2p  = drdP * etot + 1. ;
115   CeedScalar e3p  = ( E  + rho * Rd * T );
116   CeedScalar e4p  = drdT * etot + rho * cv ;
117 
118   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
119     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
120 //        [row][col] of A_i
121       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
122       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
123         // this loop handles middle columns for all 5 rows
124         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt vel_k
125         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
126                            ((i==k) ? u[j] : 0.) ) * rho;
127         dF[i][4][k+1]   = rho * u[i] * u[k]
128                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
129       }
130       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
131     }
132     dF[i][4][0] = u[i] * e2p; // F^e wrt p
133     dF[i][4][4] = u[i] * e4p; // F^e wrt T
134     dF[i][0][0] = u[i] * drdP; // F^c wrt p
135     dF[i][0][4] = u[i] * drdT; // F^c wrt T
136   }
137 }
138 
139 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
140     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
141     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
142   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
143   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
144   CeedScalar drdT = -rho / T;
145   CeedScalar drdP = 1. / ( Rd * T);
146   dU[0] = drdP * dY[0] + drdT * dY[4];
147   CeedScalar de_kinetic = 0;
148   for (int i=0; i<3; i++) {
149     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
150     de_kinetic += u[i] * dY[1+i];
151   }
152   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
153           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
154 }
155 
156 // *****************************************************************************
157 // Helper function for computing Tau elements (stabilization constant)
158 //   Model from:
159 //     PHASTA
160 //
161 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
162 //
163 // Where NOT UPDATED YET
164 // *****************************************************************************
165 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
166                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
167                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
168                                         const CeedScalar mu, const CeedScalar dt,
169                                         const CeedScalar rho) {
170   // Context
171   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
172   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
173   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
174   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
175   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
176   CeedScalar gijd[6];
177   CeedScalar tau;
178   CeedScalar dts;
179   CeedScalar fact;
180 
181   //*INDENT-OFF*
182   gijd[0] =   dXdx[0][0] * dXdx[0][0]
183             + dXdx[1][0] * dXdx[1][0]
184             + dXdx[2][0] * dXdx[2][0];
185 
186   gijd[1] =   dXdx[0][0] * dXdx[0][1]
187             + dXdx[1][0] * dXdx[1][1]
188             + dXdx[2][0] * dXdx[2][1];
189 
190   gijd[2] =   dXdx[0][1] * dXdx[0][1]
191             + dXdx[1][1] * dXdx[1][1]
192             + dXdx[2][1] * dXdx[2][1];
193 
194   gijd[3] =   dXdx[0][0] * dXdx[0][2]
195             + dXdx[1][0] * dXdx[1][2]
196             + dXdx[2][0] * dXdx[2][2];
197 
198   gijd[4] =   dXdx[0][1] * dXdx[0][2]
199             + dXdx[1][1] * dXdx[1][2]
200             + dXdx[2][1] * dXdx[2][2];
201 
202   gijd[5] =   dXdx[0][2] * dXdx[0][2]
203             + dXdx[1][2] * dXdx[1][2]
204             + dXdx[2][2] * dXdx[2][2];
205   //*INDENT-ON*
206 
207   dts = Ctau_t / dt ;
208 
209   tau = rho*rho*((4. * dts * dts)
210                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
211                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
212                  + u[2] *   u[2] * gijd[5])
213         + Ctau_v* mu * mu *
214         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
215          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
216 
217   fact=sqrt(tau);
218 
219   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
220 
221   Tau_d[1] = Ctau_M / fact;
222   Tau_d[2] = Ctau_E / ( fact * cv );
223 
224 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
225 //  to avoid a division if the compiler is smart enough to see that cv IS
226 // a constant that it could invert once for all elements
227 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
228 // OR we could absorb cv into Ctau_E but this puts more burden on user to
229 // know how to change constants with a change of fluid or units.  Same for
230 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
231 }
232 
233 // *****************************************************************************
234 // Helper function for computing Tau elements (stabilization constant)
235 //   Model from:
236 //     Stabilized Methods for Compressible Flows, Hughes et al 2010
237 //
238 //   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
239 //   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
240 //
241 // Where
242 //   c_tau     = stabilization constant (0.5 is reported as "optimal")
243 //   h[i]      = 2 length(dxdX[i])
244 //   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
245 //   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
246 //   rho(A[i]) = spectral radius of the convective flux Jacobian i,
247 //               wave speed in direction i
248 // *****************************************************************************
249 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
250                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
251                                        /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
252                                        const CeedScalar sound_speed, const CeedScalar c_tau,
253                                        const CeedScalar viscosity) {
254   const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
255                                 (2*viscosity);
256   for (int i=0; i<3; i++) {
257     // length of element in direction i
258     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
259                             dXdx[2][i]*dXdx[2][i]);
260     CeedScalar Pe = mag_u_visc*h;
261     CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
262     // fastest wave in direction i
263     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
264     Tau_x[i] = c_tau * h * Xi / fastest_wave;
265   }
266 }
267 
268 // *****************************************************************************
269 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
270 // *****************************************************************************
271 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
272                                const CeedScalar *const *in, CeedScalar *const *out) {
273   // Inputs
274   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
275 
276   // Outputs
277   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
278 
279   // Context
280   const SetupContext context = (SetupContext)ctx;
281   const CeedScalar theta0    = context->theta0;
282   const CeedScalar P0        = context->P0;
283   const CeedScalar cv        = context->cv;
284   const CeedScalar cp        = context->cp;
285   const CeedScalar *g        = context->g;
286   const CeedScalar Rd        = cp - cv;
287 
288   // Quadrature Point Loop
289   CeedPragmaSIMD
290   for (CeedInt i=0; i<Q; i++) {
291     CeedScalar q[5] = {0.};
292 
293     // Setup
294     // -- Coordinates
295     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
296     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
297 
298     // -- Density
299     const CeedScalar rho = P0 / (Rd*theta0);
300 
301     // Initial Conditions
302     q[0] = rho;
303     q[1] = 0.0;
304     q[2] = 0.0;
305     q[3] = 0.0;
306     q[4] = rho * (cv*theta0 + e_potential);
307 
308     for (CeedInt j=0; j<5; j++)
309       q0[j][i] = q[j];
310   } // End of Quadrature Point Loop
311   return 0;
312 }
313 
314 // *****************************************************************************
315 // This QFunction implements the following formulation of Navier-Stokes with
316 //   explicit time stepping method
317 //
318 // This is 3D compressible Navier-Stokes in conservation form with state
319 //   variables of density, momentum density, and total energy density.
320 //
321 // State Variables: q = ( rho, U1, U2, U3, E )
322 //   rho - Mass Density
323 //   Ui  - Momentum Density,      Ui = rho ui
324 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
325 //
326 // Navier-Stokes Equations:
327 //   drho/dt + div( U )                               = 0
328 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
329 //   dE/dt   + div( (E + P) u )                       = div( Fe )
330 //
331 // Viscous Stress:
332 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
333 //
334 // Thermal Stress:
335 //   Fe = u Fu + k grad( T )
336 // Equation of State
337 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
338 //
339 // Stabilization:
340 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
341 //     f1 = rho  sqrt(ui uj gij)
342 //     gij = dXi/dX * dXi/dX
343 //     TauC = Cc f1 / (8 gii)
344 //     TauM = min( 1 , 1 / f1 )
345 //     TauE = TauM / (Ce cv)
346 //
347 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
348 //
349 // Constants:
350 //   lambda = - 2 / 3,  From Stokes hypothesis
351 //   mu              ,  Dynamic viscosity
352 //   k               ,  Thermal conductivity
353 //   cv              ,  Specific heat, constant volume
354 //   cp              ,  Specific heat, constant pressure
355 //   g               ,  Gravity
356 //   gamma  = cp / cv,  Specific heat ratio
357 //
358 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
359 // its transpose (dXdx_k,j) to properly compute integrals of the form:
360 // int( gradv gradu )
361 //
362 // *****************************************************************************
363 CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
364                           const CeedScalar *const *in, CeedScalar *const *out) {
365   // *INDENT-OFF*
366   // Inputs
367   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
368                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
369                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
370                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
371   // Outputs
372   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
373              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
374   // *INDENT-ON*
375 
376   // Context
377   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
378   const CeedScalar lambda = context->lambda;
379   const CeedScalar mu     = context->mu;
380   const CeedScalar k      = context->k;
381   const CeedScalar cv     = context->cv;
382   const CeedScalar cp     = context->cp;
383   const CeedScalar *g     = context->g;
384   const CeedScalar dt     = context->dt;
385   const CeedScalar gamma  = cp / cv;
386   const CeedScalar Rd     = cp - cv;
387 
388   CeedPragmaSIMD
389   // Quadrature Point Loop
390   for (CeedInt i=0; i<Q; i++) {
391     // *INDENT-OFF*
392     // Setup
393     // -- Interp in
394     const CeedScalar rho        =   q[0][i];
395     const CeedScalar u[3]       =  {q[1][i] / rho,
396                                     q[2][i] / rho,
397                                     q[3][i] / rho
398                                    };
399     const CeedScalar E          =   q[4][i];
400     // -- Grad in
401     const CeedScalar drho[3]    =  {dq[0][0][i],
402                                     dq[1][0][i],
403                                     dq[2][0][i]
404                                    };
405     const CeedScalar dU[3][3]   = {{dq[0][1][i],
406                                     dq[1][1][i],
407                                     dq[2][1][i]},
408                                    {dq[0][2][i],
409                                     dq[1][2][i],
410                                     dq[2][2][i]},
411                                    {dq[0][3][i],
412                                     dq[1][3][i],
413                                     dq[2][3][i]}
414                                   };
415     const CeedScalar dE[3]      =  {dq[0][4][i],
416                                     dq[1][4][i],
417                                     dq[2][4][i]
418                                    };
419     // -- Interp-to-Interp q_data
420     const CeedScalar wdetJ      =   q_data[0][i];
421     // -- Interp-to-Grad q_data
422     // ---- Inverse of change of coordinate matrix: X_i,j
423     // *INDENT-OFF*
424     const CeedScalar dXdx[3][3] = {{q_data[1][i],
425                                     q_data[2][i],
426                                     q_data[3][i]},
427                                    {q_data[4][i],
428                                     q_data[5][i],
429                                     q_data[6][i]},
430                                    {q_data[7][i],
431                                     q_data[8][i],
432                                     q_data[9][i]}
433                                   };
434     const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
435     // *INDENT-ON*
436     // -- Grad-to-Grad q_data
437     // dU/dx
438     CeedScalar du[3][3] = {{0}};
439     CeedScalar drhodx[3] = {0};
440     CeedScalar dEdx[3] = {0};
441     CeedScalar dUdx[3][3] = {{0}};
442     CeedScalar dXdxdXdxT[3][3] = {{0}};
443     for (int j=0; j<3; j++) {
444       for (int k=0; k<3; k++) {
445         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
446         drhodx[j] += drho[k] * dXdx[k][j];
447         dEdx[j] += dE[k] * dXdx[k][j];
448         for (int l=0; l<3; l++) {
449           dUdx[j][k] += dU[j][l] * dXdx[l][k];
450           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
451         }
452       }
453     }
454     CeedScalar dudx[3][3] = {{0}};
455     for (int j=0; j<3; j++)
456       for (int k=0; k<3; k++)
457         for (int l=0; l<3; l++)
458           dudx[j][k] += du[j][l] * dXdx[l][k];
459     // -- grad_T
460     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
461                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
462                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
463                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
464                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
465                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
466                                   };
467 
468     // -- Fuvisc
469     // ---- Symmetric 3x3 matrix
470     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
471                                        lambda * (dudx[1][1] + dudx[2][2])),
472                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
473                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
474                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
475                                        lambda * (dudx[0][0] + dudx[2][2])),
476                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
477                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
478                                        lambda * (dudx[0][0] + dudx[1][1]))
479                                   };
480     // -- Fevisc
481     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
482                                    k*grad_T[0], /* *NOPAD* */
483                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
484                                    k*grad_T[1], /* *NOPAD* */
485                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
486                                    k*grad_T[2] /* *NOPAD* */
487                                   };
488     // Pressure
489     const CeedScalar
490     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
491     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
492     E_internal  = E - E_kinetic - E_potential,
493     P           = E_internal * (gamma - 1.); // P = pressure
494 
495     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
496     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
497     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
498 
499     // dqdx collects drhodx, dUdx and dEdx in one vector
500     CeedScalar dqdx[5][3];
501     for (int j=0; j<3; j++) {
502       dqdx[0][j] = drhodx[j];
503       dqdx[4][j] = dEdx[j];
504       for (int k=0; k<3; k++)
505         dqdx[k+1][j] = dUdx[k][j];
506     }
507 
508     // strong_conv = dF/dq * dq/dx    (Strong convection)
509     CeedScalar strong_conv[5] = {0};
510     for (int j=0; j<3; j++)
511       for (int k=0; k<5; k++)
512         for (int l=0; l<5; l++)
513           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
514 
515     // Body force
516     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
517 
518     // The Physics
519     // Zero dv so all future terms can safely sum into it
520     for (int j=0; j<5; j++)
521       for (int k=0; k<3; k++)
522         dv[k][j][i] = 0;
523 
524     // -- Density
525     // ---- u rho
526     for (int j=0; j<3; j++)
527       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
528                              rho*u[2]*dXdx[j][2]);
529     // -- Momentum
530     // ---- rho (u x u) + P I3
531     for (int j=0; j<3; j++)
532       for (int k=0; k<3; k++)
533         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
534                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
535                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
536     // ---- Fuvisc
537     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
538     for (int j=0; j<3; j++)
539       for (int k=0; k<3; k++)
540         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
541                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
542                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
543     // -- Total Energy Density
544     // ---- (E + P) u
545     for (int j=0; j<3; j++)
546       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
547                                          u[2]*dXdx[j][2]);
548     // ---- Fevisc
549     for (int j=0; j<3; j++)
550       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
551                               Fe[2]*dXdx[j][2]);
552     // Body Force
553     for (int j=0; j<5; j++)
554       v[j][i] = wdetJ * body_force[j];
555 
556     // Spatial Stabilization
557     // -- Not used in favor of diagonal tau. Kept for future testing
558     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
559     // CeedScalar Tau_x[3] = {0.};
560     // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
561 
562     // -- Stabilization method: none, SU, or SUPG
563     CeedScalar stab[5][3] = {{0.}};
564     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
565     CeedScalar Tau_d[3] = {0.};
566     switch (context->stabilization) {
567     case STAB_NONE:        // Galerkin
568       break;
569     case STAB_SU:        // SU
570       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
571       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
572       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
573       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
574       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
575       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
576       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
577                                   tau_strong_conv_conservative);
578       for (int j=0; j<3; j++)
579         for (int k=0; k<5; k++)
580           for (int l=0; l<5; l++)
581             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
582 
583       for (int j=0; j<5; j++)
584         for (int k=0; k<3; k++)
585           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
586                                 stab[j][1] * dXdx[k][1] +
587                                 stab[j][2] * dXdx[k][2]);
588       break;
589     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
590       break;
591     }
592 
593   } // End Quadrature Point Loop
594 
595   // Return
596   return 0;
597 }
598 
599 // *****************************************************************************
600 // This QFunction implements the Navier-Stokes equations (mentioned above) with
601 //   implicit time stepping method
602 //
603 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
604 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
605 //                                       (diffussive terms will be added later)
606 //
607 // *****************************************************************************
608 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
609                                     const CeedScalar *const *in,
610                                     CeedScalar *const *out) {
611   // *INDENT-OFF*
612   // Inputs
613   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
614                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
615                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
616                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
617                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
618   // Outputs
619   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
620              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
621   // *INDENT-ON*
622   // Context
623   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
624   const CeedScalar lambda = context->lambda;
625   const CeedScalar mu     = context->mu;
626   const CeedScalar k      = context->k;
627   const CeedScalar cv     = context->cv;
628   const CeedScalar cp     = context->cp;
629   const CeedScalar *g     = context->g;
630   const CeedScalar dt     = context->dt;
631   const CeedScalar gamma  = cp / cv;
632   const CeedScalar Rd     = cp-cv;
633 
634   CeedPragmaSIMD
635   // Quadrature Point Loop
636   for (CeedInt i=0; i<Q; i++) {
637     // Setup
638     // -- Interp in
639     const CeedScalar rho        =   q[0][i];
640     const CeedScalar u[3]       =  {q[1][i] / rho,
641                                     q[2][i] / rho,
642                                     q[3][i] / rho
643                                    };
644     const CeedScalar E          =   q[4][i];
645     // -- Grad in
646     const CeedScalar drho[3]    =  {dq[0][0][i],
647                                     dq[1][0][i],
648                                     dq[2][0][i]
649                                    };
650     // *INDENT-OFF*
651     const CeedScalar dU[3][3]   = {{dq[0][1][i],
652                                     dq[1][1][i],
653                                     dq[2][1][i]},
654                                    {dq[0][2][i],
655                                     dq[1][2][i],
656                                     dq[2][2][i]},
657                                    {dq[0][3][i],
658                                     dq[1][3][i],
659                                     dq[2][3][i]}
660                                   };
661     // *INDENT-ON*
662     const CeedScalar dE[3]      =  {dq[0][4][i],
663                                     dq[1][4][i],
664                                     dq[2][4][i]
665                                    };
666     // -- Interp-to-Interp q_data
667     const CeedScalar wdetJ      =   q_data[0][i];
668     // -- Interp-to-Grad q_data
669     // ---- Inverse of change of coordinate matrix: X_i,j
670     // *INDENT-OFF*
671     const CeedScalar dXdx[3][3] = {{q_data[1][i],
672                                     q_data[2][i],
673                                     q_data[3][i]},
674                                    {q_data[4][i],
675                                     q_data[5][i],
676                                     q_data[6][i]},
677                                    {q_data[7][i],
678                                     q_data[8][i],
679                                     q_data[9][i]}
680                                   };
681     const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
682     // *INDENT-ON*
683     // -- Grad-to-Grad q_data
684     // dU/dx
685     CeedScalar du[3][3] = {{0}};
686     CeedScalar drhodx[3] = {0};
687     CeedScalar dEdx[3] = {0};
688     CeedScalar dUdx[3][3] = {{0}};
689     CeedScalar dXdxdXdxT[3][3] = {{0}};
690     for (int j=0; j<3; j++) {
691       for (int k=0; k<3; k++) {
692         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
693         drhodx[j] += drho[k] * dXdx[k][j];
694         dEdx[j] += dE[k] * dXdx[k][j];
695         for (int l=0; l<3; l++) {
696           dUdx[j][k] += dU[j][l] * dXdx[l][k];
697           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
698         }
699       }
700     }
701     CeedScalar dudx[3][3] = {{0}};
702     for (int j=0; j<3; j++)
703       for (int k=0; k<3; k++)
704         for (int l=0; l<3; l++)
705           dudx[j][k] += du[j][l] * dXdx[l][k];
706     // -- grad_T
707     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
708                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
709                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
710                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
711                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
712                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
713                                   };
714     // -- Fuvisc
715     // ---- Symmetric 3x3 matrix
716     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
717                                        lambda * (dudx[1][1] + dudx[2][2])),
718                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
719                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
720                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
721                                        lambda * (dudx[0][0] + dudx[2][2])),
722                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
723                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
724                                        lambda * (dudx[0][0] + dudx[1][1]))
725                                   };
726     // -- Fevisc
727     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
728                                    k*grad_T[0], /* *NOPAD* */
729                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
730                                    k*grad_T[1], /* *NOPAD* */
731                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
732                                    k*grad_T[2] /* *NOPAD* */
733                                   };
734     // Pressure
735     const CeedScalar
736     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
737     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
738     E_internal  = E - E_kinetic - E_potential,
739     P           = E_internal * (gamma - 1.); // P = pressure
740 
741     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
742     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
743     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
744 
745     // dqdx collects drhodx, dUdx and dEdx in one vector
746     CeedScalar dqdx[5][3];
747     for (int j=0; j<3; j++) {
748       dqdx[0][j] = drhodx[j];
749       dqdx[4][j] = dEdx[j];
750       for (int k=0; k<3; k++)
751         dqdx[k+1][j] = dUdx[k][j];
752     }
753     // strong_conv = dF/dq * dq/dx    (Strong convection)
754     CeedScalar strong_conv[5] = {0};
755     for (int j=0; j<3; j++)
756       for (int k=0; k<5; k++)
757         for (int l=0; l<5; l++)
758           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
759 
760     // Body force
761     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
762 
763     // Strong residual
764     CeedScalar strong_res[5];
765     for (int j=0; j<5; j++)
766       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
767 
768     // The Physics
769     //-----mass matrix
770     for (int j=0; j<5; j++)
771       v[j][i] = wdetJ*q_dot[j][i];
772 
773     // Zero dv so all future terms can safely sum into it
774     for (int j=0; j<5; j++)
775       for (int k=0; k<3; k++)
776         dv[k][j][i] = 0;
777 
778     // -- Density
779     // ---- u rho
780     for (int j=0; j<3; j++)
781       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
782                              rho*u[2]*dXdx[j][2]);
783     // -- Momentum
784     // ---- rho (u x u) + P I3
785     for (int j=0; j<3; j++)
786       for (int k=0; k<3; k++)
787         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
788                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
789                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
790     // ---- Fuvisc
791     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
792     for (int j=0; j<3; j++)
793       for (int k=0; k<3; k++)
794         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
795                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
796                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
797     // -- Total Energy Density
798     // ---- (E + P) u
799     for (int j=0; j<3; j++)
800       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
801                                          u[2]*dXdx[j][2]);
802     // ---- Fevisc
803     for (int j=0; j<3; j++)
804       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
805                               Fe[2]*dXdx[j][2]);
806     // Body Force
807     for (int j=0; j<5; j++)
808       v[j][i] -= wdetJ*body_force[j];
809 
810     // Spatial Stabilization
811     // -- Not used in favor of diagonal tau. Kept for future testing
812     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
813     // CeedScalar Tau_x[3] = {0.};
814     // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
815 
816     // -- Stabilization method: none, SU, or SUPG
817     CeedScalar stab[5][3] = {{0.}};
818     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
819     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
820     CeedScalar Tau_d[3] = {0.};
821     switch (context->stabilization) {
822     case STAB_NONE:        // Galerkin
823       break;
824     case STAB_SU:        // SU
825       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
826       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
827       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
828       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
829       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
830       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
831       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
832                                   tau_strong_conv_conservative);
833       for (int j=0; j<3; j++)
834         for (int k=0; k<5; k++)
835           for (int l=0; l<5; l++)
836             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
837 
838       for (int j=0; j<5; j++)
839         for (int k=0; k<3; k++)
840           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
841                                 stab[j][1] * dXdx[k][1] +
842                                 stab[j][2] * dXdx[k][2]);
843       break;
844     case STAB_SUPG:        // SUPG
845       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
846       tau_strong_res[0] = Tau_d[0] * strong_res[0];
847       tau_strong_res[1] = Tau_d[1] * strong_res[1];
848       tau_strong_res[2] = Tau_d[1] * strong_res[2];
849       tau_strong_res[3] = Tau_d[1] * strong_res[3];
850       tau_strong_res[4] = Tau_d[2] * strong_res[4];
851 // Alternate route (useful later with primitive variable code)
852 // this function was verified against PHASTA for as IC that was as close as possible
853 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
854 // it has also been verified to compute a correct through the following
855 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
856 // applied in the triple loop below
857 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
858       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
859                                   tau_strong_res_conservative);
860       for (int j=0; j<3; j++)
861         for (int k=0; k<5; k++)
862           for (int l=0; l<5; l++)
863             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
864 
865       for (int j=0; j<5; j++)
866         for (int k=0; k<3; k++)
867           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
868                                 stab[j][1] * dXdx[k][1] +
869                                 stab[j][2] * dXdx[k][2]);
870       break;
871     }
872 
873   } // End Quadrature Point Loop
874 
875   // Return
876   return 0;
877 }
878 // *****************************************************************************
879 #endif // newtonian_h
880