xref: /petsc/src/ts/utils/dmplexlandau/land_tensors.h (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b)
1a4963045SJacob Faibussowitsch #pragma once
23ea99036SJacob Faibussowitsch 
3e0eea495SMark #define LANDAU_INVSQRT(q) (1. / PetscSqrtReal(q))
4e0eea495SMark 
5042217e8SBarry Smith #if defined(__CUDA_ARCH__)
6042217e8SBarry Smith   #define PETSC_DEVICE_FUNC_DECL __device__
7042217e8SBarry Smith #elif defined(KOKKOS_INLINE_FUNCTION)
8042217e8SBarry Smith   #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION
9042217e8SBarry Smith #else
1066976f2fSJacob Faibussowitsch   #define PETSC_DEVICE_FUNC_DECL
11042217e8SBarry Smith #endif
12042217e8SBarry Smith 
1352cdd6eaSMark #if LANDAU_DIM == 2
14e0eea495SMark /* elliptic functions
15e0eea495SMark  */
polevl_10(PetscReal x,const PetscReal coef[])16*ce78bad3SBarry Smith PETSC_DEVICE_FUNC_DECL static PetscReal polevl_10(PetscReal x, const PetscReal coef[])
17d71ae5a4SJacob Faibussowitsch {
18e0eea495SMark   PetscReal ans;
19e0eea495SMark   PetscInt  i;
20e0eea495SMark   ans = coef[0];
21e0eea495SMark   for (i = 1; i < 11; i++) ans = ans * x + coef[i];
224ad8454bSPierre Jolivet   return ans;
23e0eea495SMark }
polevl_9(PetscReal x,const PetscReal coef[])24*ce78bad3SBarry Smith PETSC_DEVICE_FUNC_DECL static PetscReal polevl_9(PetscReal x, const PetscReal coef[])
25d71ae5a4SJacob Faibussowitsch {
26e0eea495SMark   PetscReal ans;
27e0eea495SMark   PetscInt  i;
28e0eea495SMark   ans = coef[0];
29e0eea495SMark   for (i = 1; i < 10; i++) ans = ans * x + coef[i];
304ad8454bSPierre Jolivet   return ans;
31e0eea495SMark }
32e0eea495SMark /*
33e0eea495SMark  *      Complete elliptic integral of the second kind
34e0eea495SMark  */
ellipticE(PetscReal x,PetscReal * ret)35*ce78bad3SBarry Smith PETSC_DEVICE_FUNC_DECL static void ellipticE(PetscReal x, PetscReal *ret)
36d71ae5a4SJacob Faibussowitsch {
37e0eea495SMark   #if defined(PETSC_USE_REAL_SINGLE)
389371c9d4SSatish Balay   static const PetscReal P2[] = {1.53552577301013293365E-4F, 2.50888492163602060990E-3F, 8.68786816565889628429E-3F, 1.07350949056076193403E-2F, 7.77395492516787092951E-3F, 7.58395289413514708519E-3F,
399371c9d4SSatish Balay                                  1.15688436810574127319E-2F, 2.18317996015557253103E-2F, 5.68051945617860553470E-2F, 4.43147180560990850618E-1F, 1.00000000000000000299E0F};
409371c9d4SSatish Balay   static const PetscReal Q2[] = {3.27954898576485872656E-5F, 1.00962792679356715133E-3F, 6.50609489976927491433E-3F, 1.68862163993311317300E-2F, 2.61769742454493659583E-2F,
419371c9d4SSatish Balay                                  3.34833904888224918614E-2F, 4.27180926518931511717E-2F, 5.85936634471101055642E-2F, 9.37499997197644278445E-2F, 2.49999999999888314361E-1F};
42e0eea495SMark   #else
439371c9d4SSatish Balay   static const PetscReal P2[] = {1.53552577301013293365E-4, 2.50888492163602060990E-3, 8.68786816565889628429E-3, 1.07350949056076193403E-2, 7.77395492516787092951E-3, 7.58395289413514708519E-3,
449371c9d4SSatish Balay                                  1.15688436810574127319E-2, 2.18317996015557253103E-2, 5.68051945617860553470E-2, 4.43147180560990850618E-1, 1.00000000000000000299E0};
459371c9d4SSatish Balay   static const PetscReal Q2[] = {3.27954898576485872656E-5, 1.00962792679356715133E-3, 6.50609489976927491433E-3, 1.68862163993311317300E-2, 2.61769742454493659583E-2,
469371c9d4SSatish Balay                                  3.34833904888224918614E-2, 4.27180926518931511717E-2, 5.85936634471101055642E-2, 9.37499997197644278445E-2, 2.49999999999888314361E-1};
47e0eea495SMark   #endif
48e0eea495SMark   x    = 1 - x; /* where m = 1 - m1 */
49e0eea495SMark   *ret = polevl_10(x, P2) - PetscLogReal(x) * (x * polevl_9(x, Q2));
50e0eea495SMark }
51e0eea495SMark /*
52e0eea495SMark  *      Complete elliptic integral of the first kind
53e0eea495SMark  */
ellipticK(PetscReal x,PetscReal * ret)54*ce78bad3SBarry Smith PETSC_DEVICE_FUNC_DECL static void ellipticK(PetscReal x, PetscReal *ret)
55d71ae5a4SJacob Faibussowitsch {
56e0eea495SMark   #if defined(PETSC_USE_REAL_SINGLE)
579371c9d4SSatish Balay   static const PetscReal P1[] = {1.37982864606273237150E-4F, 2.28025724005875567385E-3F, 7.97404013220415179367E-3F, 9.85821379021226008714E-3F, 6.87489687449949877925E-3F, 6.18901033637687613229E-3F,
589371c9d4SSatish Balay                                  8.79078273952743772254E-3F, 1.49380448916805252718E-2F, 3.08851465246711995998E-2F, 9.65735902811690126535E-2F, 1.38629436111989062502E0F};
599371c9d4SSatish Balay   static const PetscReal Q1[] = {2.94078955048598507511E-5F, 9.14184723865917226571E-4F, 5.94058303753167793257E-3F, 1.54850516649762399335E-2F, 2.39089602715924892727E-2F, 3.01204715227604046988E-2F,
609371c9d4SSatish Balay                                  3.73774314173823228969E-2F, 4.88280347570998239232E-2F, 7.03124996963957469739E-2F, 1.24999999999870820058E-1F, 4.99999999999999999821E-1F};
61e0eea495SMark   #else
629371c9d4SSatish Balay   static const PetscReal P1[] = {1.37982864606273237150E-4, 2.28025724005875567385E-3, 7.97404013220415179367E-3, 9.85821379021226008714E-3, 6.87489687449949877925E-3, 6.18901033637687613229E-3,
639371c9d4SSatish Balay                                  8.79078273952743772254E-3, 1.49380448916805252718E-2, 3.08851465246711995998E-2, 9.65735902811690126535E-2, 1.38629436111989062502E0};
649371c9d4SSatish Balay   static const PetscReal Q1[] = {2.94078955048598507511E-5, 9.14184723865917226571E-4, 5.94058303753167793257E-3, 1.54850516649762399335E-2, 2.39089602715924892727E-2, 3.01204715227604046988E-2,
659371c9d4SSatish Balay                                  3.73774314173823228969E-2, 4.88280347570998239232E-2, 7.03124996963957469739E-2, 1.24999999999870820058E-1, 4.99999999999999999821E-1};
66e0eea495SMark   #endif
67e0eea495SMark   x    = 1 - x; /* where m = 1 - m1 */
68e0eea495SMark   *ret = polevl_10(x, P1) - PetscLogReal(x) * polevl_10(x, Q1);
69e0eea495SMark }
70a587d139SMark /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
LandauTensor2D(const PetscReal x[],const PetscReal rp,const PetscReal zp,PetscReal Ud[][2],PetscReal Uk[][2],const PetscReal mask)71*ce78bad3SBarry Smith PETSC_DEVICE_FUNC_DECL static void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask)
72d71ae5a4SJacob Faibussowitsch {
73e0eea495SMark   PetscReal l, s, r = x[0], z = x[1], i1func, i2func, i3func, ks, es, pi4pow, sqrt_1s, r2, rp2, r2prp2, zmzp, zmzp2, tt;
74e0eea495SMark   //PetscReal mask /* = !!(r!=rp || z!=zp) */;
75e0eea495SMark   /* !!(zmzp2 > 1.e-12 || (r-rp) >  1.e-12 || (r-rp) < -1.e-12); */
76e0eea495SMark   r2     = PetscSqr(r);
77e0eea495SMark   zmzp   = z - zp;
78e0eea495SMark   rp2    = PetscSqr(rp);
79e0eea495SMark   zmzp2  = PetscSqr(zmzp);
80e0eea495SMark   r2prp2 = r2 + rp2;
81e0eea495SMark   l      = r2 + rp2 + zmzp2;
82e0eea495SMark   /* if      (zmzp2 >  PETSC_SMALL) mask = 1; */
83e0eea495SMark   /* else if ((tt=(r-rp)) >  PETSC_SMALL) mask = 1; */
84e0eea495SMark   /* else if  (tt         < -PETSC_SMALL) mask = 1; */
85e0eea495SMark   /* else mask = 0; */
86e0eea495SMark   s       = mask * 2 * r * rp / l; /* mask for vectorization */
87e0eea495SMark   tt      = 1. / (1 + s);
88e0eea495SMark   pi4pow  = 4 * PETSC_PI * LANDAU_INVSQRT(PetscSqr(l) * l);
89cefb98e8SMark Adams   sqrt_1s = PetscSqrtReal(1. + s);
90e0eea495SMark   /* sp.ellipe(2.*s/(1.+s)) */
91e0eea495SMark   ellipticE(2 * s * tt, &es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */
92e0eea495SMark   /* sp.ellipk(2.*s/(1.+s)) */
93e0eea495SMark   ellipticK(2 * s * tt, &ks); /* 44 flops + 75 in rest, 21 mult */
94e0eea495SMark   /* mask is needed here just for single precision */
95e0eea495SMark   i2func   = 2. / ((1 - s) * sqrt_1s) * es;
96e0eea495SMark   i1func   = 4. / (PetscSqr(s) * sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1. + s) * es);
97e0eea495SMark   i3func   = 2. / ((1 - s) * (s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1 - s) * ks);
98a587d139SMark   Ud[0][0] = -pi4pow * (rp2 * i1func + PetscSqr(zmzp) * i2func);
99a587d139SMark   Ud[0][1] = Ud[1][0] = Uk[0][1] = pi4pow * (zmzp) * (r * i2func - rp * i3func);
100a587d139SMark   Uk[1][1] = Ud[1][1] = -pi4pow * ((r2prp2)*i2func - 2 * r * rp * i3func) * mask;
101a587d139SMark   Uk[0][0]            = -pi4pow * (zmzp2 * i3func + r * rp * i1func);
102a587d139SMark   Uk[1][0]            = pi4pow * (zmzp) * (r * i3func - rp * i2func); /* 48 mults + 21 + 21 = 90 mults and divs */
103e0eea495SMark }
10452cdd6eaSMark #else
10552cdd6eaSMark /* integration point functions */
10652cdd6eaSMark /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */
10752cdd6eaSMark /* if x==y we will return zero. This is not the correct result */
10852cdd6eaSMark /* since the tensor diverges for x==y but when integrated */
10952cdd6eaSMark /* the divergent part is antisymmetric and vanishes. This is not  */
11052cdd6eaSMark /* trivial, but can be proven. */
LandauTensor3D(const PetscReal x1[],const PetscReal xp,const PetscReal yp,const PetscReal zp,PetscReal U[][3],PetscReal mask)11166976f2fSJacob Faibussowitsch static PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask)
112d71ae5a4SJacob Faibussowitsch {
11352cdd6eaSMark   PetscReal dx[3], inorm3, inorm, inorm2, norm2, x2[] = {xp, yp, zp};
11452cdd6eaSMark   PetscInt  d;
11552cdd6eaSMark   for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) {
11652cdd6eaSMark     dx[d] = x2[d] - x1[d];
11752cdd6eaSMark     norm2 += dx[d] * dx[d];
11852cdd6eaSMark   }
11952cdd6eaSMark   inorm2 = mask / norm2;
120cefb98e8SMark Adams   inorm  = PetscSqrtReal(inorm2);
12152cdd6eaSMark   inorm3 = inorm2 * inorm;
122bcff154bSMark Adams   for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]);
123a587d139SMark   U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1];
124a587d139SMark   U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1];
125a587d139SMark   U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2];
12652cdd6eaSMark }
127cefb98e8SMark Adams   /* Relativistic form */
128cefb98e8SMark Adams   #define GAMMA3(_x, _c02) PetscSqrtReal(1.0 + ((_x[0] * _x[0]) + (_x[1] * _x[1]) + (_x[2] * _x[2])) / (_c02))
LandauTensor3DRelativistic(const PetscReal a_x1[],const PetscReal xp,const PetscReal yp,const PetscReal zp,PetscReal U[][3],PetscReal mask,PetscReal c0)12966976f2fSJacob Faibussowitsch static PETSC_DEVICE_FUNC_DECL void LandauTensor3DRelativistic(const PetscReal a_x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask, PetscReal c0)
130d71ae5a4SJacob Faibussowitsch {
131c90f4043SMark Adams   const PetscReal x2[3] = {xp, yp, zp}, x1[3] = {a_x1[0], a_x1[1], a_x1[2]}, c02 = c0 * c0, g1 = GAMMA3(x1, c02), g2 = GAMMA3(x2, c02);
132cefb98e8SMark Adams   PetscReal       fact, u1u2, diff[3], udiff2, u12, u22, wsq, rsq, tt;
133cefb98e8SMark Adams   PetscInt        i, j;
134cefb98e8SMark Adams 
135cefb98e8SMark Adams   if (mask == 0.0) {
136cefb98e8SMark Adams     for (i = 0; i < 3; ++i) {
137ad540459SPierre Jolivet       for (j = 0; j < 3; ++j) U[i][j] = 0;
138cefb98e8SMark Adams     }
139cefb98e8SMark Adams   } else {
140cefb98e8SMark Adams     for (i = 0, u1u2 = u12 = u22 = udiff2 = 0; i < 3; ++i) {
141cefb98e8SMark Adams       diff[i] = x1[i] - x2[i];
142cefb98e8SMark Adams       udiff2 += diff[i] * diff[i];
143cefb98e8SMark Adams       u12 += x1[i] * x1[i];
144cefb98e8SMark Adams       u22 += x2[i] * x2[i];
145cefb98e8SMark Adams       u1u2 += x1[i] * x2[i];
146cefb98e8SMark Adams     }
147cefb98e8SMark Adams     tt  = 2. * u1u2 * (1. - g1 * g2) + (u12 * u22 + u1u2 * u1u2) / c02; // these two terms are about the same with opposite sign
148cefb98e8SMark Adams     wsq = udiff2 + tt;
149cefb98e8SMark Adams     //wsq = udiff2 + 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02;
150cefb98e8SMark Adams     rsq  = 1. + wsq / c02;
151cefb98e8SMark Adams     fact = -rsq / (g1 * g2 * PetscSqrtReal(wsq)); /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
152cefb98e8SMark Adams     for (i = 0; i < 3; ++i) {
153ad540459SPierre Jolivet       for (j = 0; j < 3; ++j) U[i][j] = fact * (-diff[i] * diff[j] / wsq + (PetscSqrtReal(rsq) - 1.) * (x1[i] * x2[j] + x1[j] * x2[i]) / wsq);
154cefb98e8SMark Adams       U[i][i] += fact;
155cefb98e8SMark Adams     }
156cefb98e8SMark Adams   }
157cefb98e8SMark Adams }
158cefb98e8SMark Adams 
159e0eea495SMark #endif
160