1*e0eea495SMark #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q)) 2*e0eea495SMark #define LANDAU_SQRT(q) PetscSqrtReal(q) 3*e0eea495SMark 4*e0eea495SMark /* elliptic functions 5*e0eea495SMark */ 6*e0eea495SMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, PetscReal coef[]) 7*e0eea495SMark { 8*e0eea495SMark PetscReal ans; 9*e0eea495SMark PetscInt i; 10*e0eea495SMark ans = coef[0]; 11*e0eea495SMark for (i=1; i<11; i++) ans = ans * x + coef[i]; 12*e0eea495SMark return(ans); 13*e0eea495SMark } 14*e0eea495SMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, PetscReal coef[]) 15*e0eea495SMark { 16*e0eea495SMark PetscReal ans; 17*e0eea495SMark PetscInt i; 18*e0eea495SMark ans = coef[0]; 19*e0eea495SMark for (i=1; i<10; i++) ans = ans * x + coef[i]; 20*e0eea495SMark return(ans); 21*e0eea495SMark } 22*e0eea495SMark /* 23*e0eea495SMark * Complete elliptic integral of the second kind 24*e0eea495SMark */ 25*e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret) 26*e0eea495SMark { 27*e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE) 28*e0eea495SMark static PetscReal P2[] = { 29*e0eea495SMark 1.53552577301013293365E-4F, 30*e0eea495SMark 2.50888492163602060990E-3F, 31*e0eea495SMark 8.68786816565889628429E-3F, 32*e0eea495SMark 1.07350949056076193403E-2F, 33*e0eea495SMark 7.77395492516787092951E-3F, 34*e0eea495SMark 7.58395289413514708519E-3F, 35*e0eea495SMark 1.15688436810574127319E-2F, 36*e0eea495SMark 2.18317996015557253103E-2F, 37*e0eea495SMark 5.68051945617860553470E-2F, 38*e0eea495SMark 4.43147180560990850618E-1F, 39*e0eea495SMark 1.00000000000000000299E0F 40*e0eea495SMark }; 41*e0eea495SMark static PetscReal Q2[] = { 42*e0eea495SMark 3.27954898576485872656E-5F, 43*e0eea495SMark 1.00962792679356715133E-3F, 44*e0eea495SMark 6.50609489976927491433E-3F, 45*e0eea495SMark 1.68862163993311317300E-2F, 46*e0eea495SMark 2.61769742454493659583E-2F, 47*e0eea495SMark 3.34833904888224918614E-2F, 48*e0eea495SMark 4.27180926518931511717E-2F, 49*e0eea495SMark 5.85936634471101055642E-2F, 50*e0eea495SMark 9.37499997197644278445E-2F, 51*e0eea495SMark 2.49999999999888314361E-1F 52*e0eea495SMark }; 53*e0eea495SMark #else 54*e0eea495SMark static PetscReal P2[] = { 55*e0eea495SMark 1.53552577301013293365E-4, 56*e0eea495SMark 2.50888492163602060990E-3, 57*e0eea495SMark 8.68786816565889628429E-3, 58*e0eea495SMark 1.07350949056076193403E-2, 59*e0eea495SMark 7.77395492516787092951E-3, 60*e0eea495SMark 7.58395289413514708519E-3, 61*e0eea495SMark 1.15688436810574127319E-2, 62*e0eea495SMark 2.18317996015557253103E-2, 63*e0eea495SMark 5.68051945617860553470E-2, 64*e0eea495SMark 4.43147180560990850618E-1, 65*e0eea495SMark 1.00000000000000000299E0 66*e0eea495SMark }; 67*e0eea495SMark static PetscReal Q2[] = { 68*e0eea495SMark 3.27954898576485872656E-5, 69*e0eea495SMark 1.00962792679356715133E-3, 70*e0eea495SMark 6.50609489976927491433E-3, 71*e0eea495SMark 1.68862163993311317300E-2, 72*e0eea495SMark 2.61769742454493659583E-2, 73*e0eea495SMark 3.34833904888224918614E-2, 74*e0eea495SMark 4.27180926518931511717E-2, 75*e0eea495SMark 5.85936634471101055642E-2, 76*e0eea495SMark 9.37499997197644278445E-2, 77*e0eea495SMark 2.49999999999888314361E-1 78*e0eea495SMark }; 79*e0eea495SMark #endif 80*e0eea495SMark x = 1 - x; /* where m = 1 - m1 */ 81*e0eea495SMark *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2)); 82*e0eea495SMark } 83*e0eea495SMark /* 84*e0eea495SMark * Complete elliptic integral of the first kind 85*e0eea495SMark */ 86*e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret) 87*e0eea495SMark { 88*e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE) 89*e0eea495SMark static PetscReal P1[] = 90*e0eea495SMark { 91*e0eea495SMark 1.37982864606273237150E-4F, 92*e0eea495SMark 2.28025724005875567385E-3F, 93*e0eea495SMark 7.97404013220415179367E-3F, 94*e0eea495SMark 9.85821379021226008714E-3F, 95*e0eea495SMark 6.87489687449949877925E-3F, 96*e0eea495SMark 6.18901033637687613229E-3F, 97*e0eea495SMark 8.79078273952743772254E-3F, 98*e0eea495SMark 1.49380448916805252718E-2F, 99*e0eea495SMark 3.08851465246711995998E-2F, 100*e0eea495SMark 9.65735902811690126535E-2F, 101*e0eea495SMark 1.38629436111989062502E0F 102*e0eea495SMark }; 103*e0eea495SMark static PetscReal Q1[] = 104*e0eea495SMark { 105*e0eea495SMark 2.94078955048598507511E-5F, 106*e0eea495SMark 9.14184723865917226571E-4F, 107*e0eea495SMark 5.94058303753167793257E-3F, 108*e0eea495SMark 1.54850516649762399335E-2F, 109*e0eea495SMark 2.39089602715924892727E-2F, 110*e0eea495SMark 3.01204715227604046988E-2F, 111*e0eea495SMark 3.73774314173823228969E-2F, 112*e0eea495SMark 4.88280347570998239232E-2F, 113*e0eea495SMark 7.03124996963957469739E-2F, 114*e0eea495SMark 1.24999999999870820058E-1F, 115*e0eea495SMark 4.99999999999999999821E-1F 116*e0eea495SMark }; 117*e0eea495SMark #else 118*e0eea495SMark static PetscReal P1[] = 119*e0eea495SMark { 120*e0eea495SMark 1.37982864606273237150E-4, 121*e0eea495SMark 2.28025724005875567385E-3, 122*e0eea495SMark 7.97404013220415179367E-3, 123*e0eea495SMark 9.85821379021226008714E-3, 124*e0eea495SMark 6.87489687449949877925E-3, 125*e0eea495SMark 6.18901033637687613229E-3, 126*e0eea495SMark 8.79078273952743772254E-3, 127*e0eea495SMark 1.49380448916805252718E-2, 128*e0eea495SMark 3.08851465246711995998E-2, 129*e0eea495SMark 9.65735902811690126535E-2, 130*e0eea495SMark 1.38629436111989062502E0 131*e0eea495SMark }; 132*e0eea495SMark static PetscReal Q1[] = 133*e0eea495SMark { 134*e0eea495SMark 2.94078955048598507511E-5, 135*e0eea495SMark 9.14184723865917226571E-4, 136*e0eea495SMark 5.94058303753167793257E-3, 137*e0eea495SMark 1.54850516649762399335E-2, 138*e0eea495SMark 2.39089602715924892727E-2, 139*e0eea495SMark 3.01204715227604046988E-2, 140*e0eea495SMark 3.73774314173823228969E-2, 141*e0eea495SMark 4.88280347570998239232E-2, 142*e0eea495SMark 7.03124996963957469739E-2, 143*e0eea495SMark 1.24999999999870820058E-1, 144*e0eea495SMark 4.99999999999999999821E-1 145*e0eea495SMark }; 146*e0eea495SMark #endif 147*e0eea495SMark x = 1 - x; /* where m = 1 - m1 */ 148*e0eea495SMark *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1); 149*e0eea495SMark } 150*e0eea495SMark 151*e0eea495SMark 152*e0eea495SMark /* integration point functions */ 153*e0eea495SMark /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */ 154*e0eea495SMark /* if x==y we will return zero. This is not the correct result */ 155*e0eea495SMark /* since the tensor diverges for x==y but when integrated */ 156*e0eea495SMark /* the divergent part is antisymmetric and vanishes. This is not */ 157*e0eea495SMark /* trivial, but can be proven. */ 158*e0eea495SMark #if LANDAU_DIM==3 159*e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask) 160*e0eea495SMark { 161*e0eea495SMark PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp}; 162*e0eea495SMark PetscInt d; 163*e0eea495SMark for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) { 164*e0eea495SMark dx[d] = x2[d] - x1[d]; 165*e0eea495SMark norm2 += dx[d] * dx[d]; 166*e0eea495SMark } 167*e0eea495SMark inorm2 = mask/norm2; 168*e0eea495SMark inorm = LANDAU_SQRT(inorm2); 169*e0eea495SMark inorm3 = inorm2*inorm; 170*e0eea495SMark for (d = 0; d < 3; ++d) U[d][d] = inorm - inorm3 * dx[d] * dx[d]; 171*e0eea495SMark U[1][0] = U[0][1] = -inorm3 * dx[0] * dx[1]; 172*e0eea495SMark U[1][2] = U[2][1] = -inorm3 * dx[2] * dx[1]; 173*e0eea495SMark U[2][0] = U[0][2] = -inorm3 * dx[0] * dx[2]; 174*e0eea495SMark } 175*e0eea495SMark #else 176*e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask) 177*e0eea495SMark { 178*e0eea495SMark PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt; 179*e0eea495SMark //PetscReal mask /* = !!(r!=rp || z!=zp) */; 180*e0eea495SMark /* !!(zmzp2 > 1.e-12 || (r-rp) > 1.e-12 || (r-rp) < -1.e-12); */ 181*e0eea495SMark r2=PetscSqr(r); 182*e0eea495SMark zmzp=z-zp; 183*e0eea495SMark rp2=PetscSqr(rp); 184*e0eea495SMark zmzp2=PetscSqr(zmzp); 185*e0eea495SMark r2prp2=r2+rp2; 186*e0eea495SMark l = r2 + rp2 + zmzp2; 187*e0eea495SMark /* if (zmzp2 > PETSC_SMALL) mask = 1; */ 188*e0eea495SMark /* else if ((tt=(r-rp)) > PETSC_SMALL) mask = 1; */ 189*e0eea495SMark /* else if (tt < -PETSC_SMALL) mask = 1; */ 190*e0eea495SMark /* else mask = 0; */ 191*e0eea495SMark s = mask*2*r*rp/l; /* mask for vectorization */ 192*e0eea495SMark tt = 1./(1+s); 193*e0eea495SMark pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l); 194*e0eea495SMark sqrt_1s = LANDAU_SQRT(1.+s); 195*e0eea495SMark /* sp.ellipe(2.*s/(1.+s)) */ 196*e0eea495SMark ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */ 197*e0eea495SMark /* sp.ellipk(2.*s/(1.+s)) */ 198*e0eea495SMark ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */ 199*e0eea495SMark /* mask is needed here just for single precision */ 200*e0eea495SMark i2func = 2./((1-s)*sqrt_1s) * es; 201*e0eea495SMark i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es); 202*e0eea495SMark i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks); 203*e0eea495SMark Ud[0][0]= pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func); 204*e0eea495SMark Ud[0][1]=Ud[1][0]=Uk[0][1]= -pi4pow*(zmzp)*(r*i2func-rp*i3func); 205*e0eea495SMark Uk[1][1]=Ud[1][1]= pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask; 206*e0eea495SMark Uk[0][0]= pi4pow*(zmzp2*i3func+r*rp*i1func); 207*e0eea495SMark Uk[1][0]= -pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */ 208*e0eea495SMark } 209*e0eea495SMark #endif 210