xref: /petsc/src/ts/utils/dmplexlandau/land_tensors.h (revision cefb98e87952b36bfd83b9ff5af63d155f2c71df)
1e0eea495SMark #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q))
2e0eea495SMark 
3042217e8SBarry Smith #if defined(__CUDA_ARCH__)
4042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL __device__
5042217e8SBarry Smith #elif defined(KOKKOS_INLINE_FUNCTION)
6042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION
7042217e8SBarry Smith #else
8042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL static
9042217e8SBarry Smith #endif
10042217e8SBarry Smith 
1152cdd6eaSMark #if LANDAU_DIM==2
12e0eea495SMark /* elliptic functions
13e0eea495SMark  */
1452cdd6eaSMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, const PetscReal coef[])
15e0eea495SMark {
16e0eea495SMark   PetscReal ans;
17e0eea495SMark   PetscInt  i;
18e0eea495SMark   ans = coef[0];
19e0eea495SMark   for (i=1; i<11; i++) ans = ans * x + coef[i];
20e0eea495SMark   return(ans);
21e0eea495SMark }
2252cdd6eaSMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, const PetscReal coef[])
23e0eea495SMark {
24e0eea495SMark   PetscReal ans;
25e0eea495SMark   PetscInt  i;
26e0eea495SMark   ans = coef[0];
27e0eea495SMark   for (i=1; i<10; i++) ans = ans * x + coef[i];
28e0eea495SMark   return(ans);
29e0eea495SMark }
30e0eea495SMark /*
31e0eea495SMark  *      Complete elliptic integral of the second kind
32e0eea495SMark  */
33e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret)
34e0eea495SMark {
35e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE)
3652cdd6eaSMark   static const PetscReal P2[] = {
37e0eea495SMark     1.53552577301013293365E-4F,
38e0eea495SMark     2.50888492163602060990E-3F,
39e0eea495SMark     8.68786816565889628429E-3F,
40e0eea495SMark     1.07350949056076193403E-2F,
41e0eea495SMark     7.77395492516787092951E-3F,
42e0eea495SMark     7.58395289413514708519E-3F,
43e0eea495SMark     1.15688436810574127319E-2F,
44e0eea495SMark     2.18317996015557253103E-2F,
45e0eea495SMark     5.68051945617860553470E-2F,
46e0eea495SMark     4.43147180560990850618E-1F,
47e0eea495SMark     1.00000000000000000299E0F
48e0eea495SMark   };
4952cdd6eaSMark   static const PetscReal Q2[] = {
50e0eea495SMark     3.27954898576485872656E-5F,
51e0eea495SMark     1.00962792679356715133E-3F,
52e0eea495SMark     6.50609489976927491433E-3F,
53e0eea495SMark     1.68862163993311317300E-2F,
54e0eea495SMark     2.61769742454493659583E-2F,
55e0eea495SMark     3.34833904888224918614E-2F,
56e0eea495SMark     4.27180926518931511717E-2F,
57e0eea495SMark     5.85936634471101055642E-2F,
58e0eea495SMark     9.37499997197644278445E-2F,
59e0eea495SMark     2.49999999999888314361E-1F
60e0eea495SMark   };
61e0eea495SMark #else
6252cdd6eaSMark   static const PetscReal P2[] = {
63e0eea495SMark     1.53552577301013293365E-4,
64e0eea495SMark     2.50888492163602060990E-3,
65e0eea495SMark     8.68786816565889628429E-3,
66e0eea495SMark     1.07350949056076193403E-2,
67e0eea495SMark     7.77395492516787092951E-3,
68e0eea495SMark     7.58395289413514708519E-3,
69e0eea495SMark     1.15688436810574127319E-2,
70e0eea495SMark     2.18317996015557253103E-2,
71e0eea495SMark     5.68051945617860553470E-2,
72e0eea495SMark     4.43147180560990850618E-1,
73e0eea495SMark     1.00000000000000000299E0
74e0eea495SMark   };
7552cdd6eaSMark   static const PetscReal Q2[] = {
76e0eea495SMark     3.27954898576485872656E-5,
77e0eea495SMark     1.00962792679356715133E-3,
78e0eea495SMark     6.50609489976927491433E-3,
79e0eea495SMark     1.68862163993311317300E-2,
80e0eea495SMark     2.61769742454493659583E-2,
81e0eea495SMark     3.34833904888224918614E-2,
82e0eea495SMark     4.27180926518931511717E-2,
83e0eea495SMark     5.85936634471101055642E-2,
84e0eea495SMark     9.37499997197644278445E-2,
85e0eea495SMark     2.49999999999888314361E-1
86e0eea495SMark   };
87e0eea495SMark #endif
88e0eea495SMark   x = 1 - x; /* where m = 1 - m1 */
89e0eea495SMark   *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2));
90e0eea495SMark }
91e0eea495SMark /*
92e0eea495SMark  *      Complete elliptic integral of the first kind
93e0eea495SMark  */
94e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret)
95e0eea495SMark {
96e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE)
9752cdd6eaSMark   static const PetscReal P1[] =
98e0eea495SMark     {
99e0eea495SMark       1.37982864606273237150E-4F,
100e0eea495SMark       2.28025724005875567385E-3F,
101e0eea495SMark       7.97404013220415179367E-3F,
102e0eea495SMark       9.85821379021226008714E-3F,
103e0eea495SMark       6.87489687449949877925E-3F,
104e0eea495SMark       6.18901033637687613229E-3F,
105e0eea495SMark       8.79078273952743772254E-3F,
106e0eea495SMark       1.49380448916805252718E-2F,
107e0eea495SMark       3.08851465246711995998E-2F,
108e0eea495SMark       9.65735902811690126535E-2F,
109e0eea495SMark       1.38629436111989062502E0F
110e0eea495SMark     };
11152cdd6eaSMark   static const PetscReal Q1[] =
112e0eea495SMark     {
113e0eea495SMark       2.94078955048598507511E-5F,
114e0eea495SMark       9.14184723865917226571E-4F,
115e0eea495SMark       5.94058303753167793257E-3F,
116e0eea495SMark       1.54850516649762399335E-2F,
117e0eea495SMark       2.39089602715924892727E-2F,
118e0eea495SMark       3.01204715227604046988E-2F,
119e0eea495SMark       3.73774314173823228969E-2F,
120e0eea495SMark       4.88280347570998239232E-2F,
121e0eea495SMark       7.03124996963957469739E-2F,
122e0eea495SMark       1.24999999999870820058E-1F,
123e0eea495SMark       4.99999999999999999821E-1F
124e0eea495SMark     };
125e0eea495SMark #else
12652cdd6eaSMark   static const PetscReal P1[] =
127e0eea495SMark     {
128e0eea495SMark       1.37982864606273237150E-4,
129e0eea495SMark       2.28025724005875567385E-3,
130e0eea495SMark       7.97404013220415179367E-3,
131e0eea495SMark       9.85821379021226008714E-3,
132e0eea495SMark       6.87489687449949877925E-3,
133e0eea495SMark       6.18901033637687613229E-3,
134e0eea495SMark       8.79078273952743772254E-3,
135e0eea495SMark       1.49380448916805252718E-2,
136e0eea495SMark       3.08851465246711995998E-2,
137e0eea495SMark       9.65735902811690126535E-2,
138e0eea495SMark       1.38629436111989062502E0
139e0eea495SMark     };
14052cdd6eaSMark   static const PetscReal Q1[] =
141e0eea495SMark     {
142e0eea495SMark       2.94078955048598507511E-5,
143e0eea495SMark       9.14184723865917226571E-4,
144e0eea495SMark       5.94058303753167793257E-3,
145e0eea495SMark       1.54850516649762399335E-2,
146e0eea495SMark       2.39089602715924892727E-2,
147e0eea495SMark       3.01204715227604046988E-2,
148e0eea495SMark       3.73774314173823228969E-2,
149e0eea495SMark       4.88280347570998239232E-2,
150e0eea495SMark       7.03124996963957469739E-2,
151e0eea495SMark       1.24999999999870820058E-1,
152e0eea495SMark       4.99999999999999999821E-1
153e0eea495SMark     };
154e0eea495SMark #endif
155e0eea495SMark   x = 1 - x; /* where m = 1 - m1 */
156e0eea495SMark   *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1);
157e0eea495SMark }
158a587d139SMark /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
159e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask)
160e0eea495SMark {
161e0eea495SMark   PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt;
162e0eea495SMark   //PetscReal mask /* = !!(r!=rp || z!=zp) */;
163e0eea495SMark   /* !!(zmzp2 > 1.e-12 || (r-rp) >  1.e-12 || (r-rp) < -1.e-12); */
164e0eea495SMark   r2=PetscSqr(r);
165e0eea495SMark   zmzp=z-zp;
166e0eea495SMark   rp2=PetscSqr(rp);
167e0eea495SMark   zmzp2=PetscSqr(zmzp);
168e0eea495SMark   r2prp2=r2+rp2;
169e0eea495SMark   l = r2 + rp2 + zmzp2;
170e0eea495SMark   /* if      (zmzp2 >  PETSC_SMALL) mask = 1; */
171e0eea495SMark   /* else if ((tt=(r-rp)) >  PETSC_SMALL) mask = 1; */
172e0eea495SMark   /* else if  (tt         < -PETSC_SMALL) mask = 1; */
173e0eea495SMark   /* else mask = 0; */
174e0eea495SMark   s = mask*2*r*rp/l; /* mask for vectorization */
175e0eea495SMark   tt = 1./(1+s);
176e0eea495SMark   pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l);
177*cefb98e8SMark Adams   sqrt_1s = PetscSqrtReal(1.+s);
178e0eea495SMark   /* sp.ellipe(2.*s/(1.+s)) */
179e0eea495SMark   ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */
180e0eea495SMark   /* sp.ellipk(2.*s/(1.+s)) */
181e0eea495SMark   ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */
182e0eea495SMark   /* mask is needed here just for single precision */
183e0eea495SMark   i2func = 2./((1-s)*sqrt_1s) * es;
184e0eea495SMark   i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es);
185e0eea495SMark   i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks);
186a587d139SMark   Ud[0][0]=                   -pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func);
187a587d139SMark   Ud[0][1]=Ud[1][0]=Uk[0][1]=  pi4pow*(zmzp)*(r*i2func-rp*i3func);
188a587d139SMark   Uk[1][1]=Ud[1][1]=          -pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask;
189a587d139SMark   Uk[0][0]=                   -pi4pow*(zmzp2*i3func+r*rp*i1func);
190a587d139SMark   Uk[1][0]=                    pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */
191e0eea495SMark }
19252cdd6eaSMark #else
19352cdd6eaSMark /* integration point functions */
19452cdd6eaSMark /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */
19552cdd6eaSMark /* if x==y we will return zero. This is not the correct result */
19652cdd6eaSMark /* since the tensor diverges for x==y but when integrated */
19752cdd6eaSMark /* the divergent part is antisymmetric and vanishes. This is not  */
19852cdd6eaSMark /* trivial, but can be proven. */
19952cdd6eaSMark PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask)
20052cdd6eaSMark {
20152cdd6eaSMark   PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp};
20252cdd6eaSMark   PetscInt  d;
20352cdd6eaSMark   for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) {
20452cdd6eaSMark     dx[d] = x2[d] - x1[d];
20552cdd6eaSMark     norm2 += dx[d] * dx[d];
20652cdd6eaSMark   }
20752cdd6eaSMark   inorm2 = mask/norm2;
208*cefb98e8SMark Adams   inorm = PetscSqrtReal(inorm2);
20952cdd6eaSMark   inorm3 = inorm2*inorm;
210bcff154bSMark Adams   for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]);
211a587d139SMark   U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1];
212a587d139SMark   U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1];
213a587d139SMark   U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2];
21452cdd6eaSMark }
215*cefb98e8SMark Adams /* Relativistic form */
216*cefb98e8SMark Adams #define GAMMA3(_x,_c02) PetscSqrtReal(1.0 + ((_x[0]*_x[0]) + (_x[1]*_x[1]) + (_x[2]*_x[2]))/(_c02))
217*cefb98e8SMark Adams PETSC_DEVICE_FUNC_DECL void LandauTensor3DRelativistic(const PetscReal a_x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask, PetscReal c0)
218*cefb98e8SMark Adams {
219*cefb98e8SMark Adams   const PetscReal x2[3] = {xp,yp,zp}, x1[3] = {a_x1[0],a_x1[1],a_x1[2]}, c02 = c0*c0, g1 = GAMMA3(x1,c02), g2 = GAMMA3(x2,c02), g1_eps = g1 - 1., g2_eps = g2 - 1., gg_eps = g1_eps + g2_eps + g1_eps*g2_eps;
220*cefb98e8SMark Adams   PetscReal       fact, u1u2, diff[3], udiff2,u12,u22,wsq,rsq, tt;
221*cefb98e8SMark Adams   PetscInt        i,j;
222*cefb98e8SMark Adams 
223*cefb98e8SMark Adams   if (mask==0.0) {
224*cefb98e8SMark Adams     for (i = 0; i < 3; ++i) {
225*cefb98e8SMark Adams       for (j = 0; j < 3; ++j) {
226*cefb98e8SMark Adams         U[i][j] = 0;
227*cefb98e8SMark Adams       }
228*cefb98e8SMark Adams     }
229*cefb98e8SMark Adams   } else {
230*cefb98e8SMark Adams     for (i = 0, u1u2 = u12 = u22 = udiff2 = 0; i < 3; ++i) {
231*cefb98e8SMark Adams       diff[i] = x1[i] - x2[i];
232*cefb98e8SMark Adams       udiff2 += diff[i] * diff[i];
233*cefb98e8SMark Adams       u12 += x1[i]*x1[i];
234*cefb98e8SMark Adams       u22 += x2[i]*x2[i];
235*cefb98e8SMark Adams       u1u2 += x1[i]*x2[i];
236*cefb98e8SMark Adams     }
237*cefb98e8SMark Adams     tt = 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02; // these two terms are about the same with opposite sign
238*cefb98e8SMark Adams     wsq = udiff2 + tt;
239*cefb98e8SMark Adams     //wsq = udiff2 + 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02;
240*cefb98e8SMark Adams     rsq = 1.+wsq/c02;
241*cefb98e8SMark Adams     fact = -rsq/(g1*g2*PetscSqrtReal(wsq)); /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
242*cefb98e8SMark Adams     for (i = 0; i < 3; ++i) {
243*cefb98e8SMark Adams       for (j = 0; j < 3; ++j) {
244*cefb98e8SMark Adams         U[i][j] = fact * ( -diff[i]*diff[j]/wsq + (PetscSqrtReal(rsq)-1.)*(x1[i]*x2[j] + x1[j]*x2[i])/wsq);
245*cefb98e8SMark Adams       }
246*cefb98e8SMark Adams       U[i][i] += fact;
247*cefb98e8SMark Adams     }
248*cefb98e8SMark Adams #if defined(PETSC_USE_DEBUG)
249*cefb98e8SMark Adams     {
250*cefb98e8SMark Adams       PetscReal diff_g[3], udiff = sqrt(udiff2), err, err2;
251*cefb98e8SMark Adams       for (i = 0; i < 3; ++i) diff_g[i] = x1[i]/g1 - x2[i]/g2;
252*cefb98e8SMark Adams       for (i = 0, err = 0; i < 3; ++i) {
253*cefb98e8SMark Adams         double tmp=0;
254*cefb98e8SMark Adams         for (j = 0; j < 3; ++j) {
255*cefb98e8SMark Adams           tmp += U[i][j]*diff_g[j];
256*cefb98e8SMark Adams         }
257*cefb98e8SMark Adams         err += tmp * tmp;
258*cefb98e8SMark Adams       }
259*cefb98e8SMark Adams       err = sqrt(err);
260*cefb98e8SMark Adams       err2 = udiff2*(err)/(g1*g2);
261*cefb98e8SMark Adams #if defined(PETSC_USE_REAL_SINGLE)
262*cefb98e8SMark Adams       if (err>1.e-6 || err!=err) exit(11);
263*cefb98e8SMark Adams #else
264*cefb98e8SMark Adams       if (err>1.e-13 || err!=err) exit(12);
265*cefb98e8SMark Adams #endif
266*cefb98e8SMark Adams     }
267*cefb98e8SMark Adams #endif
268*cefb98e8SMark Adams   }
269*cefb98e8SMark Adams }
270*cefb98e8SMark Adams 
271e0eea495SMark #endif
272