xref: /petsc/src/ts/utils/dmplexlandau/land_tensors.h (revision 608e5a7a8a35b40c4e75a0aae313cbdeb3df9fee)
1 #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q))
2 #define LANDAU_SQRT(q) PetscSqrtReal(q)
3 
4 #if defined(__CUDA_ARCH__)
5 #define PETSC_DEVICE_FUNC_DECL __device__
6 #elif defined(KOKKOS_INLINE_FUNCTION)
7 #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION
8 #else
9 #define PETSC_DEVICE_FUNC_DECL static
10 #endif
11 
12 #if LANDAU_DIM==2
13 /* elliptic functions
14  */
15 PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, const PetscReal coef[])
16 {
17   PetscReal ans;
18   PetscInt  i;
19   ans = coef[0];
20   for (i=1; i<11; i++) ans = ans * x + coef[i];
21   return(ans);
22 }
23 PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, const PetscReal coef[])
24 {
25   PetscReal ans;
26   PetscInt  i;
27   ans = coef[0];
28   for (i=1; i<10; i++) ans = ans * x + coef[i];
29   return(ans);
30 }
31 /*
32  *      Complete elliptic integral of the second kind
33  */
34 PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret)
35 {
36 #if defined(PETSC_USE_REAL_SINGLE)
37   static const PetscReal P2[] = {
38     1.53552577301013293365E-4F,
39     2.50888492163602060990E-3F,
40     8.68786816565889628429E-3F,
41     1.07350949056076193403E-2F,
42     7.77395492516787092951E-3F,
43     7.58395289413514708519E-3F,
44     1.15688436810574127319E-2F,
45     2.18317996015557253103E-2F,
46     5.68051945617860553470E-2F,
47     4.43147180560990850618E-1F,
48     1.00000000000000000299E0F
49   };
50   static const PetscReal Q2[] = {
51     3.27954898576485872656E-5F,
52     1.00962792679356715133E-3F,
53     6.50609489976927491433E-3F,
54     1.68862163993311317300E-2F,
55     2.61769742454493659583E-2F,
56     3.34833904888224918614E-2F,
57     4.27180926518931511717E-2F,
58     5.85936634471101055642E-2F,
59     9.37499997197644278445E-2F,
60     2.49999999999888314361E-1F
61   };
62 #else
63   static const PetscReal P2[] = {
64     1.53552577301013293365E-4,
65     2.50888492163602060990E-3,
66     8.68786816565889628429E-3,
67     1.07350949056076193403E-2,
68     7.77395492516787092951E-3,
69     7.58395289413514708519E-3,
70     1.15688436810574127319E-2,
71     2.18317996015557253103E-2,
72     5.68051945617860553470E-2,
73     4.43147180560990850618E-1,
74     1.00000000000000000299E0
75   };
76   static const PetscReal Q2[] = {
77     3.27954898576485872656E-5,
78     1.00962792679356715133E-3,
79     6.50609489976927491433E-3,
80     1.68862163993311317300E-2,
81     2.61769742454493659583E-2,
82     3.34833904888224918614E-2,
83     4.27180926518931511717E-2,
84     5.85936634471101055642E-2,
85     9.37499997197644278445E-2,
86     2.49999999999888314361E-1
87   };
88 #endif
89   x = 1 - x; /* where m = 1 - m1 */
90   *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2));
91 }
92 /*
93  *      Complete elliptic integral of the first kind
94  */
95 PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret)
96 {
97 #if defined(PETSC_USE_REAL_SINGLE)
98   static const PetscReal P1[] =
99     {
100       1.37982864606273237150E-4F,
101       2.28025724005875567385E-3F,
102       7.97404013220415179367E-3F,
103       9.85821379021226008714E-3F,
104       6.87489687449949877925E-3F,
105       6.18901033637687613229E-3F,
106       8.79078273952743772254E-3F,
107       1.49380448916805252718E-2F,
108       3.08851465246711995998E-2F,
109       9.65735902811690126535E-2F,
110       1.38629436111989062502E0F
111     };
112   static const PetscReal Q1[] =
113     {
114       2.94078955048598507511E-5F,
115       9.14184723865917226571E-4F,
116       5.94058303753167793257E-3F,
117       1.54850516649762399335E-2F,
118       2.39089602715924892727E-2F,
119       3.01204715227604046988E-2F,
120       3.73774314173823228969E-2F,
121       4.88280347570998239232E-2F,
122       7.03124996963957469739E-2F,
123       1.24999999999870820058E-1F,
124       4.99999999999999999821E-1F
125     };
126 #else
127   static const PetscReal P1[] =
128     {
129       1.37982864606273237150E-4,
130       2.28025724005875567385E-3,
131       7.97404013220415179367E-3,
132       9.85821379021226008714E-3,
133       6.87489687449949877925E-3,
134       6.18901033637687613229E-3,
135       8.79078273952743772254E-3,
136       1.49380448916805252718E-2,
137       3.08851465246711995998E-2,
138       9.65735902811690126535E-2,
139       1.38629436111989062502E0
140     };
141   static const PetscReal Q1[] =
142     {
143       2.94078955048598507511E-5,
144       9.14184723865917226571E-4,
145       5.94058303753167793257E-3,
146       1.54850516649762399335E-2,
147       2.39089602715924892727E-2,
148       3.01204715227604046988E-2,
149       3.73774314173823228969E-2,
150       4.88280347570998239232E-2,
151       7.03124996963957469739E-2,
152       1.24999999999870820058E-1,
153       4.99999999999999999821E-1
154     };
155 #endif
156   x = 1 - x; /* where m = 1 - m1 */
157   *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1);
158 }
159 /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
160 PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask)
161 {
162   PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt;
163   //PetscReal mask /* = !!(r!=rp || z!=zp) */;
164   /* !!(zmzp2 > 1.e-12 || (r-rp) >  1.e-12 || (r-rp) < -1.e-12); */
165   r2=PetscSqr(r);
166   zmzp=z-zp;
167   rp2=PetscSqr(rp);
168   zmzp2=PetscSqr(zmzp);
169   r2prp2=r2+rp2;
170   l = r2 + rp2 + zmzp2;
171   /* if      (zmzp2 >  PETSC_SMALL) mask = 1; */
172   /* else if ((tt=(r-rp)) >  PETSC_SMALL) mask = 1; */
173   /* else if  (tt         < -PETSC_SMALL) mask = 1; */
174   /* else mask = 0; */
175   s = mask*2*r*rp/l; /* mask for vectorization */
176   tt = 1./(1+s);
177   pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l);
178   sqrt_1s = LANDAU_SQRT(1.+s);
179   /* sp.ellipe(2.*s/(1.+s)) */
180   ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */
181   /* sp.ellipk(2.*s/(1.+s)) */
182   ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */
183   /* mask is needed here just for single precision */
184   i2func = 2./((1-s)*sqrt_1s) * es;
185   i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es);
186   i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks);
187   Ud[0][0]=                   -pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func);
188   Ud[0][1]=Ud[1][0]=Uk[0][1]=  pi4pow*(zmzp)*(r*i2func-rp*i3func);
189   Uk[1][1]=Ud[1][1]=          -pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask;
190   Uk[0][0]=                   -pi4pow*(zmzp2*i3func+r*rp*i1func);
191   Uk[1][0]=                    pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */
192 }
193 #else
194 /* integration point functions */
195 /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */
196 /* if x==y we will return zero. This is not the correct result */
197 /* since the tensor diverges for x==y but when integrated */
198 /* the divergent part is antisymmetric and vanishes. This is not  */
199 /* trivial, but can be proven. */
200 PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask)
201 {
202   PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp};
203   PetscInt  d;
204   for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) {
205     dx[d] = x2[d] - x1[d];
206     norm2 += dx[d] * dx[d];
207   }
208   inorm2 = mask/norm2;
209   inorm = LANDAU_SQRT(inorm2);
210   inorm3 = inorm2*inorm;
211   for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]);
212   U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1];
213   U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1];
214   U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2];
215 }
216 #endif
217