xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision 841e4c7362a2acf3a6f116f4961b1eb52fa410fc)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 
19 #ifndef M_PI
20 #define M_PI    3.14159265358979323846
21 #endif
22 
23 #ifndef setup_context_struct
24 #define setup_context_struct
25 typedef struct SetupContext_ *SetupContext;
26 struct SetupContext_ {
27   CeedScalar theta0;
28   CeedScalar thetaC;
29   CeedScalar P0;
30   CeedScalar N;
31   CeedScalar cv;
32   CeedScalar cp;
33   CeedScalar g[3];
34   CeedScalar rc;
35   CeedScalar lx;
36   CeedScalar ly;
37   CeedScalar lz;
38   CeedScalar center[3];
39   CeedScalar dc_axis[3];
40   CeedScalar wind[3];
41   CeedScalar time;
42   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
43   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
44   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
45 };
46 #endif
47 
48 // *****************************************************************************
49 // Helper function for computing flux Jacobian
50 // *****************************************************************************
51 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
52     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
53     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
54   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
55   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
56   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
57     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
58       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
59                       u[i]*u[j];
60       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
61         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
62         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
63                           ((i==k) ? u[j] : 0.) -
64                           ((i==j) ? u[k] : 0.) * (gamma-1.);
65         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
66                           (gamma-1.)*u[i]*u[k];
67       }
68       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
69     }
70     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
71     dF[i][4][4] = u[i] * gamma;
72   }
73 }
74 
75 // *****************************************************************************
76 // Helper function for computing flux Jacobian of Primitive variables
77 // *****************************************************************************
78 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
79     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
80     const CeedScalar Rd, const CeedScalar cv) {
81   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
82   // TODO Add in gravity's contribution
83 
84   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
85   CeedScalar drdT = -rho / T;
86   CeedScalar drdP = 1. / ( Rd * T);
87   CeedScalar etot =  E / rho ;
88   CeedScalar e2p  = drdP * etot + 1. ;
89   CeedScalar e3p  = ( E  + rho * Rd * T );
90   CeedScalar e4p  = drdT * etot + rho * cv ;
91 
92   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
93     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
94 //        [row][col] of A_i
95       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
96       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
97         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
98         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
99                            ((i==k) ? u[j] : 0.) ) * rho;
100         dF[i][4][k+1]   = rho * u[i] * u[k]
101                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
102       }
103       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
104     }
105     dF[i][4][0] = u[i] * e2p; // F^e wrt p
106     dF[i][4][4] = u[i] * e4p; // F^e wrt T
107     dF[i][0][0] = u[i] * drdP; // F^c wrt p
108     dF[i][0][4] = u[i] * drdT; // F^c wrt T
109   }
110 }
111 
112 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
113     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
114     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
115   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
116   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
117   CeedScalar drdT = -rho / T;
118   CeedScalar drdP = 1. / ( Rd * T);
119   dU[0] = drdP * dY[0] + drdT * dY[4];
120   CeedScalar de_kinetic = 0;
121   for (int i=0; i<3; i++) {
122     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
123     de_kinetic += u[i] * dY[1+i];
124   }
125   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
126           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
127 }
128 
129 // *****************************************************************************
130 // Helper function for computing Tau elements (stabilization constant)
131 //   Model from:
132 //     PHASTA
133 //
134 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
135 //
136 // Where NOT UPDATED YET
137 // *****************************************************************************
138 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
139                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
140                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
141                                         const CeedScalar mu, const CeedScalar dt,
142                                         const CeedScalar rho) {
143   // Context
144   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
145   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
146   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
147   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
148   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
149   CeedScalar gijd[6];
150   CeedScalar tau;
151   CeedScalar dts;
152   CeedScalar fact;
153 
154   //*INDENT-OFF*
155   gijd[0] =   dXdx[0][0] * dXdx[0][0]
156             + dXdx[1][0] * dXdx[1][0]
157             + dXdx[2][0] * dXdx[2][0];
158 
159   gijd[1] =   dXdx[0][0] * dXdx[0][1]
160             + dXdx[1][0] * dXdx[1][1]
161             + dXdx[2][0] * dXdx[2][1];
162 
163   gijd[2] =   dXdx[0][1] * dXdx[0][1]
164             + dXdx[1][1] * dXdx[1][1]
165             + dXdx[2][1] * dXdx[2][1];
166 
167   gijd[3] =   dXdx[0][0] * dXdx[0][2]
168             + dXdx[1][0] * dXdx[1][2]
169             + dXdx[2][0] * dXdx[2][2];
170 
171   gijd[4] =   dXdx[0][1] * dXdx[0][2]
172             + dXdx[1][1] * dXdx[1][2]
173             + dXdx[2][1] * dXdx[2][2];
174 
175   gijd[5] =   dXdx[0][2] * dXdx[0][2]
176             + dXdx[1][2] * dXdx[1][2]
177             + dXdx[2][2] * dXdx[2][2];
178   //*INDENT-ON*
179 
180   dts = Ctau_t / dt ;
181 
182   tau = rho*rho*((4. * dts * dts)
183                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
184                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
185                  + u[2] *   u[2] * gijd[5])
186         + Ctau_v* mu * mu *
187         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
188          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
189 
190   fact=sqrt(tau);
191 
192   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
193 
194   Tau_d[1] = Ctau_M / fact;
195   Tau_d[2] = Ctau_E / ( fact * cv );
196 
197 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
198 //  to avoid a division if the compiler is smart enough to see that cv IS
199 // a constant that it could invert once for all elements
200 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
201 // OR we could absorb cv into Ctau_E but this puts more burden on user to
202 // know how to change constants with a change of fluid or units.  Same for
203 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
204 }
205 
206 // *****************************************************************************
207 // Helper function for computing Tau elements (stabilization constant)
208 //   Model from:
209 //     Stabilized Methods for Compressible Flows, Hughes et al 2010
210 //
211 //   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
212 //   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
213 //
214 // Where
215 //   c_tau     = stabilization constant (0.5 is reported as "optimal")
216 //   h[i]      = 2 length(dxdX[i])
217 //   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
218 //   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
219 //   rho(A[i]) = spectral radius of the convective flux Jacobian i,
220 //               wave speed in direction i
221 // *****************************************************************************
222 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
223                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
224                                        /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
225                                        const CeedScalar sound_speed, const CeedScalar c_tau,
226                                        const CeedScalar viscosity) {
227   const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
228                                 (2*viscosity);
229   for (int i=0; i<3; i++) {
230     // length of element in direction i
231     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
232                             dXdx[2][i]*dXdx[2][i]);
233     CeedScalar Pe = mag_u_visc*h;
234     CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
235     // fastest wave in direction i
236     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
237     Tau_x[i] = c_tau * h * Xi / fastest_wave;
238   }
239 }
240 
241 // *****************************************************************************
242 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
243 // *****************************************************************************
244 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
245                                const CeedScalar *const *in, CeedScalar *const *out) {
246   // Inputs
247   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
248 
249   // Outputs
250   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
251 
252   // Context
253   const SetupContext context = (SetupContext)ctx;
254   const CeedScalar theta0    = context->theta0;
255   const CeedScalar P0        = context->P0;
256   const CeedScalar cv        = context->cv;
257   const CeedScalar cp        = context->cp;
258   const CeedScalar *g        = context->g;
259   const CeedScalar Rd        = cp - cv;
260 
261   // Quadrature Point Loop
262   CeedPragmaSIMD
263   for (CeedInt i=0; i<Q; i++) {
264     CeedScalar q[5] = {0.};
265 
266     // Setup
267     // -- Coordinates
268     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
269     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
270 
271     // -- Density
272     const CeedScalar rho = P0 / (Rd*theta0);
273 
274     // Initial Conditions
275     q[0] = rho;
276     q[1] = 0.0;
277     q[2] = 0.0;
278     q[3] = 0.0;
279     q[4] = rho * (cv*theta0 + e_potential);
280 
281     for (CeedInt j=0; j<5; j++)
282       q0[j][i] = q[j];
283   } // End of Quadrature Point Loop
284   return 0;
285 }
286 
287 // *****************************************************************************
288 // This QFunction implements the following formulation of Navier-Stokes with
289 //   explicit time stepping method
290 //
291 // This is 3D compressible Navier-Stokes in conservation form with state
292 //   variables of density, momentum density, and total energy density.
293 //
294 // State Variables: q = ( rho, U1, U2, U3, E )
295 //   rho - Mass Density
296 //   Ui  - Momentum Density,      Ui = rho ui
297 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
298 //
299 // Navier-Stokes Equations:
300 //   drho/dt + div( U )                               = 0
301 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
302 //   dE/dt   + div( (E + P) u )                       = div( Fe )
303 //
304 // Viscous Stress:
305 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
306 //
307 // Thermal Stress:
308 //   Fe = u Fu + k grad( T )
309 // Equation of State
310 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
311 //
312 // Stabilization:
313 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
314 //     f1 = rho  sqrt(ui uj gij)
315 //     gij = dXi/dX * dXi/dX
316 //     TauC = Cc f1 / (8 gii)
317 //     TauM = min( 1 , 1 / f1 )
318 //     TauE = TauM / (Ce cv)
319 //
320 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
321 //
322 // Constants:
323 //   lambda = - 2 / 3,  From Stokes hypothesis
324 //   mu              ,  Dynamic viscosity
325 //   k               ,  Thermal conductivity
326 //   cv              ,  Specific heat, constant volume
327 //   cp              ,  Specific heat, constant pressure
328 //   g               ,  Gravity
329 //   gamma  = cp / cv,  Specific heat ratio
330 //
331 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
332 // its transpose (dXdx_k,j) to properly compute integrals of the form:
333 // int( gradv gradu )
334 //
335 // *****************************************************************************
336 CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
337                           const CeedScalar *const *in, CeedScalar *const *out) {
338   // *INDENT-OFF*
339   // Inputs
340   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
341                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
342                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
343                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
344   // Outputs
345   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
346              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
347   // *INDENT-ON*
348 
349   // Context
350   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
351   const CeedScalar lambda = context->lambda;
352   const CeedScalar mu     = context->mu;
353   const CeedScalar k      = context->k;
354   const CeedScalar cv     = context->cv;
355   const CeedScalar cp     = context->cp;
356   const CeedScalar *g     = context->g;
357   const CeedScalar dt     = context->dt;
358   const CeedScalar gamma  = cp / cv;
359   const CeedScalar Rd     = cp - cv;
360 
361   CeedPragmaSIMD
362   // Quadrature Point Loop
363   for (CeedInt i=0; i<Q; i++) {
364     // *INDENT-OFF*
365     // Setup
366     // -- Interp in
367     const CeedScalar rho        =   q[0][i];
368     const CeedScalar u[3]       =  {q[1][i] / rho,
369                                     q[2][i] / rho,
370                                     q[3][i] / rho
371                                    };
372     const CeedScalar E          =   q[4][i];
373     // -- Grad in
374     const CeedScalar drho[3]    =  {dq[0][0][i],
375                                     dq[1][0][i],
376                                     dq[2][0][i]
377                                    };
378     const CeedScalar dU[3][3]   = {{dq[0][1][i],
379                                     dq[1][1][i],
380                                     dq[2][1][i]},
381                                    {dq[0][2][i],
382                                     dq[1][2][i],
383                                     dq[2][2][i]},
384                                    {dq[0][3][i],
385                                     dq[1][3][i],
386                                     dq[2][3][i]}
387                                   };
388     const CeedScalar dE[3]      =  {dq[0][4][i],
389                                     dq[1][4][i],
390                                     dq[2][4][i]
391                                    };
392     // -- Interp-to-Interp q_data
393     const CeedScalar wdetJ      =   q_data[0][i];
394     // -- Interp-to-Grad q_data
395     // ---- Inverse of change of coordinate matrix: X_i,j
396     // *INDENT-OFF*
397     const CeedScalar dXdx[3][3] = {{q_data[1][i],
398                                     q_data[2][i],
399                                     q_data[3][i]},
400                                    {q_data[4][i],
401                                     q_data[5][i],
402                                     q_data[6][i]},
403                                    {q_data[7][i],
404                                     q_data[8][i],
405                                     q_data[9][i]}
406                                   };
407     const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
408     // *INDENT-ON*
409     // -- Grad-to-Grad q_data
410     // dU/dx
411     CeedScalar du[3][3] = {{0}};
412     CeedScalar drhodx[3] = {0};
413     CeedScalar dEdx[3] = {0};
414     CeedScalar dUdx[3][3] = {{0}};
415     CeedScalar dXdxdXdxT[3][3] = {{0}};
416     for (int j=0; j<3; j++) {
417       for (int k=0; k<3; k++) {
418         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
419         drhodx[j] += drho[k] * dXdx[k][j];
420         dEdx[j] += dE[k] * dXdx[k][j];
421         for (int l=0; l<3; l++) {
422           dUdx[j][k] += dU[j][l] * dXdx[l][k];
423           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
424         }
425       }
426     }
427     CeedScalar dudx[3][3] = {{0}};
428     for (int j=0; j<3; j++)
429       for (int k=0; k<3; k++)
430         for (int l=0; l<3; l++)
431           dudx[j][k] += du[j][l] * dXdx[l][k];
432     // -- grad_T
433     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
434                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
435                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
436                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
437                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
438                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
439                                   };
440 
441     // -- Fuvisc
442     // ---- Symmetric 3x3 matrix
443     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
444                                        lambda * (dudx[1][1] + dudx[2][2])),
445                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
446                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
447                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
448                                        lambda * (dudx[0][0] + dudx[2][2])),
449                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
450                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
451                                        lambda * (dudx[0][0] + dudx[1][1]))
452                                   };
453     // -- Fevisc
454     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
455                                    k*grad_T[0], /* *NOPAD* */
456                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
457                                    k*grad_T[1], /* *NOPAD* */
458                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
459                                    k*grad_T[2] /* *NOPAD* */
460                                   };
461     // Pressure
462     const CeedScalar
463     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
464     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
465     E_internal  = E - E_kinetic - E_potential,
466     P           = E_internal * (gamma - 1.); // P = pressure
467 
468     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
469     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
470     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
471 
472     // dqdx collects drhodx, dUdx and dEdx in one vector
473     CeedScalar dqdx[5][3];
474     for (int j=0; j<3; j++) {
475       dqdx[0][j] = drhodx[j];
476       dqdx[4][j] = dEdx[j];
477       for (int k=0; k<3; k++)
478         dqdx[k+1][j] = dUdx[k][j];
479     }
480 
481     // strong_conv = dF/dq * dq/dx    (Strong convection)
482     CeedScalar strong_conv[5] = {0};
483     for (int j=0; j<3; j++)
484       for (int k=0; k<5; k++)
485         for (int l=0; l<5; l++)
486           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
487 
488     // Body force
489     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
490 
491     // The Physics
492     // Zero dv so all future terms can safely sum into it
493     for (int j=0; j<5; j++)
494       for (int k=0; k<3; k++)
495         dv[k][j][i] = 0;
496 
497     // -- Density
498     // ---- u rho
499     for (int j=0; j<3; j++)
500       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
501                              rho*u[2]*dXdx[j][2]);
502     // -- Momentum
503     // ---- rho (u x u) + P I3
504     for (int j=0; j<3; j++)
505       for (int k=0; k<3; k++)
506         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
507                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
508                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
509     // ---- Fuvisc
510     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
511     for (int j=0; j<3; j++)
512       for (int k=0; k<3; k++)
513         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
514                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
515                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
516     // -- Total Energy Density
517     // ---- (E + P) u
518     for (int j=0; j<3; j++)
519       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
520                                          u[2]*dXdx[j][2]);
521     // ---- Fevisc
522     for (int j=0; j<3; j++)
523       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
524                               Fe[2]*dXdx[j][2]);
525     // Body Force
526     for (int j=0; j<5; j++)
527       v[j][i] = wdetJ * body_force[j];
528 
529     // Spatial Stabilization
530     // -- Not used in favor of diagonal tau. Kept for future testing
531     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
532     // CeedScalar Tau_x[3] = {0.};
533     // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
534 
535     // -- Stabilization method: none, SU, or SUPG
536     CeedScalar stab[5][3] = {{0.}};
537     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
538     CeedScalar Tau_d[3] = {0.};
539     switch (context->stabilization) {
540     case STAB_NONE:        // Galerkin
541       break;
542     case STAB_SU:        // SU
543       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
544       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
545       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
546       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
547       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
548       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
549       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
550                                   tau_strong_conv_conservative);
551       for (int j=0; j<3; j++)
552         for (int k=0; k<5; k++)
553           for (int l=0; l<5; l++)
554             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
555 
556       for (int j=0; j<5; j++)
557         for (int k=0; k<3; k++)
558           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
559                                 stab[j][1] * dXdx[k][1] +
560                                 stab[j][2] * dXdx[k][2]);
561       break;
562     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
563       break;
564     }
565 
566   } // End Quadrature Point Loop
567 
568   // Return
569   return 0;
570 }
571 
572 // *****************************************************************************
573 // This QFunction implements the Navier-Stokes equations (mentioned above) with
574 //   implicit time stepping method
575 //
576 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
577 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
578 //                                       (diffussive terms will be added later)
579 //
580 // *****************************************************************************
581 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
582                                     const CeedScalar *const *in,
583                                     CeedScalar *const *out) {
584   // *INDENT-OFF*
585   // Inputs
586   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
587                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
588                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
589                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
590                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
591   // Outputs
592   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
593              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
594   // *INDENT-ON*
595   // Context
596   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
597   const CeedScalar lambda = context->lambda;
598   const CeedScalar mu     = context->mu;
599   const CeedScalar k      = context->k;
600   const CeedScalar cv     = context->cv;
601   const CeedScalar cp     = context->cp;
602   const CeedScalar *g     = context->g;
603   const CeedScalar dt     = context->dt;
604   const CeedScalar gamma  = cp / cv;
605   const CeedScalar Rd     = cp-cv;
606 
607   CeedPragmaSIMD
608   // Quadrature Point Loop
609   for (CeedInt i=0; i<Q; i++) {
610     // Setup
611     // -- Interp in
612     const CeedScalar rho        =   q[0][i];
613     const CeedScalar u[3]       =  {q[1][i] / rho,
614                                     q[2][i] / rho,
615                                     q[3][i] / rho
616                                    };
617     const CeedScalar E          =   q[4][i];
618     // -- Grad in
619     const CeedScalar drho[3]    =  {dq[0][0][i],
620                                     dq[1][0][i],
621                                     dq[2][0][i]
622                                    };
623     // *INDENT-OFF*
624     const CeedScalar dU[3][3]   = {{dq[0][1][i],
625                                     dq[1][1][i],
626                                     dq[2][1][i]},
627                                    {dq[0][2][i],
628                                     dq[1][2][i],
629                                     dq[2][2][i]},
630                                    {dq[0][3][i],
631                                     dq[1][3][i],
632                                     dq[2][3][i]}
633                                   };
634     // *INDENT-ON*
635     const CeedScalar dE[3]      =  {dq[0][4][i],
636                                     dq[1][4][i],
637                                     dq[2][4][i]
638                                    };
639     // -- Interp-to-Interp q_data
640     const CeedScalar wdetJ      =   q_data[0][i];
641     // -- Interp-to-Grad q_data
642     // ---- Inverse of change of coordinate matrix: X_i,j
643     // *INDENT-OFF*
644     const CeedScalar dXdx[3][3] = {{q_data[1][i],
645                                     q_data[2][i],
646                                     q_data[3][i]},
647                                    {q_data[4][i],
648                                     q_data[5][i],
649                                     q_data[6][i]},
650                                    {q_data[7][i],
651                                     q_data[8][i],
652                                     q_data[9][i]}
653                                   };
654     const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
655     // *INDENT-ON*
656     // -- Grad-to-Grad q_data
657     // dU/dx
658     CeedScalar du[3][3] = {{0}};
659     CeedScalar drhodx[3] = {0};
660     CeedScalar dEdx[3] = {0};
661     CeedScalar dUdx[3][3] = {{0}};
662     CeedScalar dXdxdXdxT[3][3] = {{0}};
663     for (int j=0; j<3; j++) {
664       for (int k=0; k<3; k++) {
665         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
666         drhodx[j] += drho[k] * dXdx[k][j];
667         dEdx[j] += dE[k] * dXdx[k][j];
668         for (int l=0; l<3; l++) {
669           dUdx[j][k] += dU[j][l] * dXdx[l][k];
670           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
671         }
672       }
673     }
674     CeedScalar dudx[3][3] = {{0}};
675     for (int j=0; j<3; j++)
676       for (int k=0; k<3; k++)
677         for (int l=0; l<3; l++)
678           dudx[j][k] += du[j][l] * dXdx[l][k];
679     // -- grad_T
680     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
681                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
682                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
683                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
684                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
685                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
686                                   };
687     // -- Fuvisc
688     // ---- Symmetric 3x3 matrix
689     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
690                                        lambda * (dudx[1][1] + dudx[2][2])),
691                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
692                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
693                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
694                                        lambda * (dudx[0][0] + dudx[2][2])),
695                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
696                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
697                                        lambda * (dudx[0][0] + dudx[1][1]))
698                                   };
699     // -- Fevisc
700     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
701                                    k*grad_T[0], /* *NOPAD* */
702                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
703                                    k*grad_T[1], /* *NOPAD* */
704                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
705                                    k*grad_T[2] /* *NOPAD* */
706                                   };
707     // Pressure
708     const CeedScalar
709     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
710     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
711     E_internal  = E - E_kinetic - E_potential,
712     P           = E_internal * (gamma - 1.); // P = pressure
713 
714     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
715     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
716     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
717 
718     // dqdx collects drhodx, dUdx and dEdx in one vector
719     CeedScalar dqdx[5][3];
720     for (int j=0; j<3; j++) {
721       dqdx[0][j] = drhodx[j];
722       dqdx[4][j] = dEdx[j];
723       for (int k=0; k<3; k++)
724         dqdx[k+1][j] = dUdx[k][j];
725     }
726     // strong_conv = dF/dq * dq/dx    (Strong convection)
727     CeedScalar strong_conv[5] = {0};
728     for (int j=0; j<3; j++)
729       for (int k=0; k<5; k++)
730         for (int l=0; l<5; l++)
731           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
732 
733     // Body force
734     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
735 
736     // Strong residual
737     CeedScalar strong_res[5];
738     for (int j=0; j<5; j++)
739       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
740 
741     // The Physics
742     //-----mass matrix
743     for (int j=0; j<5; j++)
744       v[j][i] = wdetJ*q_dot[j][i];
745 
746     // Zero dv so all future terms can safely sum into it
747     for (int j=0; j<5; j++)
748       for (int k=0; k<3; k++)
749         dv[k][j][i] = 0;
750 
751     // -- Density
752     // ---- u rho
753     for (int j=0; j<3; j++)
754       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
755                              rho*u[2]*dXdx[j][2]);
756     // -- Momentum
757     // ---- rho (u x u) + P I3
758     for (int j=0; j<3; j++)
759       for (int k=0; k<3; k++)
760         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
761                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
762                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
763     // ---- Fuvisc
764     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
765     for (int j=0; j<3; j++)
766       for (int k=0; k<3; k++)
767         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
768                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
769                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
770     // -- Total Energy Density
771     // ---- (E + P) u
772     for (int j=0; j<3; j++)
773       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
774                                          u[2]*dXdx[j][2]);
775     // ---- Fevisc
776     for (int j=0; j<3; j++)
777       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
778                               Fe[2]*dXdx[j][2]);
779     // Body Force
780     for (int j=0; j<5; j++)
781       v[j][i] -= wdetJ*body_force[j];
782 
783     // Spatial Stabilization
784     // -- Not used in favor of diagonal tau. Kept for future testing
785     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
786     // CeedScalar Tau_x[3] = {0.};
787     // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
788 
789     // -- Stabilization method: none, SU, or SUPG
790     CeedScalar stab[5][3] = {{0.}};
791     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
792     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
793     CeedScalar Tau_d[3] = {0.};
794     switch (context->stabilization) {
795     case STAB_NONE:        // Galerkin
796       break;
797     case STAB_SU:        // SU
798       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
799       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
800       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
801       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
802       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
803       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
804       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
805                                   tau_strong_conv_conservative);
806       for (int j=0; j<3; j++)
807         for (int k=0; k<5; k++)
808           for (int l=0; l<5; l++)
809             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
810 
811       for (int j=0; j<5; j++)
812         for (int k=0; k<3; k++)
813           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
814                                 stab[j][1] * dXdx[k][1] +
815                                 stab[j][2] * dXdx[k][2]);
816       break;
817     case STAB_SUPG:        // SUPG
818       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
819       tau_strong_res[0] = Tau_d[0] * strong_res[0];
820       tau_strong_res[1] = Tau_d[1] * strong_res[1];
821       tau_strong_res[2] = Tau_d[1] * strong_res[2];
822       tau_strong_res[3] = Tau_d[1] * strong_res[3];
823       tau_strong_res[4] = Tau_d[2] * strong_res[4];
824 // Alternate route (useful later with primitive variable code)
825 // this function was verified against PHASTA for as IC that was as close as possible
826 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
827 // it has also been verified to compute a correct through the following
828 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
829 // applied in the triple loop below
830 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
831       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
832                                   tau_strong_res_conservative);
833       for (int j=0; j<3; j++)
834         for (int k=0; k<5; k++)
835           for (int l=0; l<5; l++)
836             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
837 
838       for (int j=0; j<5; j++)
839         for (int k=0; k<3; k++)
840           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
841                                 stab[j][1] * dXdx[k][1] +
842                                 stab[j][2] * dXdx[k][2]);
843       break;
844     }
845 
846   } // End Quadrature Point Loop
847 
848   // Return
849   return 0;
850 }
851 // *****************************************************************************
852 #endif // newtonian_h
853