xref: /petsc/src/ts/utils/dmplexlandau/land_tensors.h (revision e0eea49528ddcce32a775a862994a11c74fcac87)
1*e0eea495SMark #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q))
2*e0eea495SMark #define LANDAU_SQRT(q) PetscSqrtReal(q)
3*e0eea495SMark 
4*e0eea495SMark /* elliptic functions
5*e0eea495SMark  */
6*e0eea495SMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, PetscReal coef[])
7*e0eea495SMark {
8*e0eea495SMark   PetscReal ans;
9*e0eea495SMark   PetscInt  i;
10*e0eea495SMark   ans = coef[0];
11*e0eea495SMark   for (i=1; i<11; i++) ans = ans * x + coef[i];
12*e0eea495SMark   return(ans);
13*e0eea495SMark }
14*e0eea495SMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, PetscReal coef[])
15*e0eea495SMark {
16*e0eea495SMark   PetscReal ans;
17*e0eea495SMark   PetscInt  i;
18*e0eea495SMark   ans = coef[0];
19*e0eea495SMark   for (i=1; i<10; i++) ans = ans * x + coef[i];
20*e0eea495SMark   return(ans);
21*e0eea495SMark }
22*e0eea495SMark /*
23*e0eea495SMark  *      Complete elliptic integral of the second kind
24*e0eea495SMark  */
25*e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret)
26*e0eea495SMark {
27*e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE)
28*e0eea495SMark   static PetscReal P2[] = {
29*e0eea495SMark     1.53552577301013293365E-4F,
30*e0eea495SMark     2.50888492163602060990E-3F,
31*e0eea495SMark     8.68786816565889628429E-3F,
32*e0eea495SMark     1.07350949056076193403E-2F,
33*e0eea495SMark     7.77395492516787092951E-3F,
34*e0eea495SMark     7.58395289413514708519E-3F,
35*e0eea495SMark     1.15688436810574127319E-2F,
36*e0eea495SMark     2.18317996015557253103E-2F,
37*e0eea495SMark     5.68051945617860553470E-2F,
38*e0eea495SMark     4.43147180560990850618E-1F,
39*e0eea495SMark     1.00000000000000000299E0F
40*e0eea495SMark   };
41*e0eea495SMark   static PetscReal Q2[] = {
42*e0eea495SMark     3.27954898576485872656E-5F,
43*e0eea495SMark     1.00962792679356715133E-3F,
44*e0eea495SMark     6.50609489976927491433E-3F,
45*e0eea495SMark     1.68862163993311317300E-2F,
46*e0eea495SMark     2.61769742454493659583E-2F,
47*e0eea495SMark     3.34833904888224918614E-2F,
48*e0eea495SMark     4.27180926518931511717E-2F,
49*e0eea495SMark     5.85936634471101055642E-2F,
50*e0eea495SMark     9.37499997197644278445E-2F,
51*e0eea495SMark     2.49999999999888314361E-1F
52*e0eea495SMark   };
53*e0eea495SMark #else
54*e0eea495SMark   static PetscReal P2[] = {
55*e0eea495SMark     1.53552577301013293365E-4,
56*e0eea495SMark     2.50888492163602060990E-3,
57*e0eea495SMark     8.68786816565889628429E-3,
58*e0eea495SMark     1.07350949056076193403E-2,
59*e0eea495SMark     7.77395492516787092951E-3,
60*e0eea495SMark     7.58395289413514708519E-3,
61*e0eea495SMark     1.15688436810574127319E-2,
62*e0eea495SMark     2.18317996015557253103E-2,
63*e0eea495SMark     5.68051945617860553470E-2,
64*e0eea495SMark     4.43147180560990850618E-1,
65*e0eea495SMark     1.00000000000000000299E0
66*e0eea495SMark   };
67*e0eea495SMark   static PetscReal Q2[] = {
68*e0eea495SMark     3.27954898576485872656E-5,
69*e0eea495SMark     1.00962792679356715133E-3,
70*e0eea495SMark     6.50609489976927491433E-3,
71*e0eea495SMark     1.68862163993311317300E-2,
72*e0eea495SMark     2.61769742454493659583E-2,
73*e0eea495SMark     3.34833904888224918614E-2,
74*e0eea495SMark     4.27180926518931511717E-2,
75*e0eea495SMark     5.85936634471101055642E-2,
76*e0eea495SMark     9.37499997197644278445E-2,
77*e0eea495SMark     2.49999999999888314361E-1
78*e0eea495SMark   };
79*e0eea495SMark #endif
80*e0eea495SMark   x = 1 - x; /* where m = 1 - m1 */
81*e0eea495SMark   *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2));
82*e0eea495SMark }
83*e0eea495SMark /*
84*e0eea495SMark  *      Complete elliptic integral of the first kind
85*e0eea495SMark  */
86*e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret)
87*e0eea495SMark {
88*e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE)
89*e0eea495SMark   static PetscReal P1[] =
90*e0eea495SMark     {
91*e0eea495SMark       1.37982864606273237150E-4F,
92*e0eea495SMark       2.28025724005875567385E-3F,
93*e0eea495SMark       7.97404013220415179367E-3F,
94*e0eea495SMark       9.85821379021226008714E-3F,
95*e0eea495SMark       6.87489687449949877925E-3F,
96*e0eea495SMark       6.18901033637687613229E-3F,
97*e0eea495SMark       8.79078273952743772254E-3F,
98*e0eea495SMark       1.49380448916805252718E-2F,
99*e0eea495SMark       3.08851465246711995998E-2F,
100*e0eea495SMark       9.65735902811690126535E-2F,
101*e0eea495SMark       1.38629436111989062502E0F
102*e0eea495SMark     };
103*e0eea495SMark   static PetscReal Q1[] =
104*e0eea495SMark     {
105*e0eea495SMark       2.94078955048598507511E-5F,
106*e0eea495SMark       9.14184723865917226571E-4F,
107*e0eea495SMark       5.94058303753167793257E-3F,
108*e0eea495SMark       1.54850516649762399335E-2F,
109*e0eea495SMark       2.39089602715924892727E-2F,
110*e0eea495SMark       3.01204715227604046988E-2F,
111*e0eea495SMark       3.73774314173823228969E-2F,
112*e0eea495SMark       4.88280347570998239232E-2F,
113*e0eea495SMark       7.03124996963957469739E-2F,
114*e0eea495SMark       1.24999999999870820058E-1F,
115*e0eea495SMark       4.99999999999999999821E-1F
116*e0eea495SMark     };
117*e0eea495SMark #else
118*e0eea495SMark   static PetscReal P1[] =
119*e0eea495SMark     {
120*e0eea495SMark       1.37982864606273237150E-4,
121*e0eea495SMark       2.28025724005875567385E-3,
122*e0eea495SMark       7.97404013220415179367E-3,
123*e0eea495SMark       9.85821379021226008714E-3,
124*e0eea495SMark       6.87489687449949877925E-3,
125*e0eea495SMark       6.18901033637687613229E-3,
126*e0eea495SMark       8.79078273952743772254E-3,
127*e0eea495SMark       1.49380448916805252718E-2,
128*e0eea495SMark       3.08851465246711995998E-2,
129*e0eea495SMark       9.65735902811690126535E-2,
130*e0eea495SMark       1.38629436111989062502E0
131*e0eea495SMark     };
132*e0eea495SMark   static PetscReal Q1[] =
133*e0eea495SMark     {
134*e0eea495SMark       2.94078955048598507511E-5,
135*e0eea495SMark       9.14184723865917226571E-4,
136*e0eea495SMark       5.94058303753167793257E-3,
137*e0eea495SMark       1.54850516649762399335E-2,
138*e0eea495SMark       2.39089602715924892727E-2,
139*e0eea495SMark       3.01204715227604046988E-2,
140*e0eea495SMark       3.73774314173823228969E-2,
141*e0eea495SMark       4.88280347570998239232E-2,
142*e0eea495SMark       7.03124996963957469739E-2,
143*e0eea495SMark       1.24999999999870820058E-1,
144*e0eea495SMark       4.99999999999999999821E-1
145*e0eea495SMark     };
146*e0eea495SMark #endif
147*e0eea495SMark   x = 1 - x; /* where m = 1 - m1 */
148*e0eea495SMark   *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1);
149*e0eea495SMark }
150*e0eea495SMark 
151*e0eea495SMark 
152*e0eea495SMark /* integration point functions */
153*e0eea495SMark /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */
154*e0eea495SMark /* if x==y we will return zero. This is not the correct result */
155*e0eea495SMark /* since the tensor diverges for x==y but when integrated */
156*e0eea495SMark /* the divergent part is antisymmetric and vanishes. This is not  */
157*e0eea495SMark /* trivial, but can be proven. */
158*e0eea495SMark #if LANDAU_DIM==3
159*e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask)
160*e0eea495SMark {
161*e0eea495SMark   PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp};
162*e0eea495SMark   PetscInt  d;
163*e0eea495SMark   for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) {
164*e0eea495SMark     dx[d] = x2[d] - x1[d];
165*e0eea495SMark     norm2 += dx[d] * dx[d];
166*e0eea495SMark   }
167*e0eea495SMark   inorm2 = mask/norm2;
168*e0eea495SMark   inorm = LANDAU_SQRT(inorm2);
169*e0eea495SMark   inorm3 = inorm2*inorm;
170*e0eea495SMark   for (d = 0; d < 3; ++d) U[d][d] = inorm - inorm3 * dx[d] * dx[d];
171*e0eea495SMark   U[1][0] = U[0][1] = -inorm3 * dx[0] * dx[1];
172*e0eea495SMark   U[1][2] = U[2][1] = -inorm3 * dx[2] * dx[1];
173*e0eea495SMark   U[2][0] = U[0][2] = -inorm3 * dx[0] * dx[2];
174*e0eea495SMark }
175*e0eea495SMark #else
176*e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask)
177*e0eea495SMark {
178*e0eea495SMark   PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt;
179*e0eea495SMark   //PetscReal mask /* = !!(r!=rp || z!=zp) */;
180*e0eea495SMark   /* !!(zmzp2 > 1.e-12 || (r-rp) >  1.e-12 || (r-rp) < -1.e-12); */
181*e0eea495SMark   r2=PetscSqr(r);
182*e0eea495SMark   zmzp=z-zp;
183*e0eea495SMark   rp2=PetscSqr(rp);
184*e0eea495SMark   zmzp2=PetscSqr(zmzp);
185*e0eea495SMark   r2prp2=r2+rp2;
186*e0eea495SMark   l = r2 + rp2 + zmzp2;
187*e0eea495SMark   /* if      (zmzp2 >  PETSC_SMALL) mask = 1; */
188*e0eea495SMark   /* else if ((tt=(r-rp)) >  PETSC_SMALL) mask = 1; */
189*e0eea495SMark   /* else if  (tt         < -PETSC_SMALL) mask = 1; */
190*e0eea495SMark   /* else mask = 0; */
191*e0eea495SMark   s = mask*2*r*rp/l; /* mask for vectorization */
192*e0eea495SMark   tt = 1./(1+s);
193*e0eea495SMark   pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l);
194*e0eea495SMark   sqrt_1s = LANDAU_SQRT(1.+s);
195*e0eea495SMark   /* sp.ellipe(2.*s/(1.+s)) */
196*e0eea495SMark   ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */
197*e0eea495SMark   /* sp.ellipk(2.*s/(1.+s)) */
198*e0eea495SMark   ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */
199*e0eea495SMark   /* mask is needed here just for single precision */
200*e0eea495SMark   i2func = 2./((1-s)*sqrt_1s) * es;
201*e0eea495SMark   i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es);
202*e0eea495SMark   i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks);
203*e0eea495SMark   Ud[0][0]=                    pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func);
204*e0eea495SMark   Ud[0][1]=Ud[1][0]=Uk[0][1]= -pi4pow*(zmzp)*(r*i2func-rp*i3func);
205*e0eea495SMark   Uk[1][1]=Ud[1][1]=           pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask;
206*e0eea495SMark   Uk[0][0]=                    pi4pow*(zmzp2*i3func+r*rp*i1func);
207*e0eea495SMark   Uk[1][0]=                   -pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */
208*e0eea495SMark }
209*e0eea495SMark #endif
210