1e0eea495SMark #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q)) 2e0eea495SMark 3042217e8SBarry Smith #if defined(__CUDA_ARCH__) 4042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL __device__ 5042217e8SBarry Smith #elif defined(KOKKOS_INLINE_FUNCTION) 6042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION 7042217e8SBarry Smith #else 8042217e8SBarry Smith #define PETSC_DEVICE_FUNC_DECL static 9042217e8SBarry Smith #endif 10042217e8SBarry Smith 1152cdd6eaSMark #if LANDAU_DIM==2 12e0eea495SMark /* elliptic functions 13e0eea495SMark */ 1452cdd6eaSMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, const PetscReal coef[]) 15e0eea495SMark { 16e0eea495SMark PetscReal ans; 17e0eea495SMark PetscInt i; 18e0eea495SMark ans = coef[0]; 19e0eea495SMark for (i=1; i<11; i++) ans = ans * x + coef[i]; 20e0eea495SMark return(ans); 21e0eea495SMark } 2252cdd6eaSMark PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, const PetscReal coef[]) 23e0eea495SMark { 24e0eea495SMark PetscReal ans; 25e0eea495SMark PetscInt i; 26e0eea495SMark ans = coef[0]; 27e0eea495SMark for (i=1; i<10; i++) ans = ans * x + coef[i]; 28e0eea495SMark return(ans); 29e0eea495SMark } 30e0eea495SMark /* 31e0eea495SMark * Complete elliptic integral of the second kind 32e0eea495SMark */ 33e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret) 34e0eea495SMark { 35e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE) 3652cdd6eaSMark static const PetscReal P2[] = { 37e0eea495SMark 1.53552577301013293365E-4F, 38e0eea495SMark 2.50888492163602060990E-3F, 39e0eea495SMark 8.68786816565889628429E-3F, 40e0eea495SMark 1.07350949056076193403E-2F, 41e0eea495SMark 7.77395492516787092951E-3F, 42e0eea495SMark 7.58395289413514708519E-3F, 43e0eea495SMark 1.15688436810574127319E-2F, 44e0eea495SMark 2.18317996015557253103E-2F, 45e0eea495SMark 5.68051945617860553470E-2F, 46e0eea495SMark 4.43147180560990850618E-1F, 47e0eea495SMark 1.00000000000000000299E0F 48e0eea495SMark }; 4952cdd6eaSMark static const PetscReal Q2[] = { 50e0eea495SMark 3.27954898576485872656E-5F, 51e0eea495SMark 1.00962792679356715133E-3F, 52e0eea495SMark 6.50609489976927491433E-3F, 53e0eea495SMark 1.68862163993311317300E-2F, 54e0eea495SMark 2.61769742454493659583E-2F, 55e0eea495SMark 3.34833904888224918614E-2F, 56e0eea495SMark 4.27180926518931511717E-2F, 57e0eea495SMark 5.85936634471101055642E-2F, 58e0eea495SMark 9.37499997197644278445E-2F, 59e0eea495SMark 2.49999999999888314361E-1F 60e0eea495SMark }; 61e0eea495SMark #else 6252cdd6eaSMark static const PetscReal P2[] = { 63e0eea495SMark 1.53552577301013293365E-4, 64e0eea495SMark 2.50888492163602060990E-3, 65e0eea495SMark 8.68786816565889628429E-3, 66e0eea495SMark 1.07350949056076193403E-2, 67e0eea495SMark 7.77395492516787092951E-3, 68e0eea495SMark 7.58395289413514708519E-3, 69e0eea495SMark 1.15688436810574127319E-2, 70e0eea495SMark 2.18317996015557253103E-2, 71e0eea495SMark 5.68051945617860553470E-2, 72e0eea495SMark 4.43147180560990850618E-1, 73e0eea495SMark 1.00000000000000000299E0 74e0eea495SMark }; 7552cdd6eaSMark static const PetscReal Q2[] = { 76e0eea495SMark 3.27954898576485872656E-5, 77e0eea495SMark 1.00962792679356715133E-3, 78e0eea495SMark 6.50609489976927491433E-3, 79e0eea495SMark 1.68862163993311317300E-2, 80e0eea495SMark 2.61769742454493659583E-2, 81e0eea495SMark 3.34833904888224918614E-2, 82e0eea495SMark 4.27180926518931511717E-2, 83e0eea495SMark 5.85936634471101055642E-2, 84e0eea495SMark 9.37499997197644278445E-2, 85e0eea495SMark 2.49999999999888314361E-1 86e0eea495SMark }; 87e0eea495SMark #endif 88e0eea495SMark x = 1 - x; /* where m = 1 - m1 */ 89e0eea495SMark *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2)); 90e0eea495SMark } 91e0eea495SMark /* 92e0eea495SMark * Complete elliptic integral of the first kind 93e0eea495SMark */ 94e0eea495SMark PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret) 95e0eea495SMark { 96e0eea495SMark #if defined(PETSC_USE_REAL_SINGLE) 9752cdd6eaSMark static const PetscReal P1[] = 98e0eea495SMark { 99e0eea495SMark 1.37982864606273237150E-4F, 100e0eea495SMark 2.28025724005875567385E-3F, 101e0eea495SMark 7.97404013220415179367E-3F, 102e0eea495SMark 9.85821379021226008714E-3F, 103e0eea495SMark 6.87489687449949877925E-3F, 104e0eea495SMark 6.18901033637687613229E-3F, 105e0eea495SMark 8.79078273952743772254E-3F, 106e0eea495SMark 1.49380448916805252718E-2F, 107e0eea495SMark 3.08851465246711995998E-2F, 108e0eea495SMark 9.65735902811690126535E-2F, 109e0eea495SMark 1.38629436111989062502E0F 110e0eea495SMark }; 11152cdd6eaSMark static const PetscReal Q1[] = 112e0eea495SMark { 113e0eea495SMark 2.94078955048598507511E-5F, 114e0eea495SMark 9.14184723865917226571E-4F, 115e0eea495SMark 5.94058303753167793257E-3F, 116e0eea495SMark 1.54850516649762399335E-2F, 117e0eea495SMark 2.39089602715924892727E-2F, 118e0eea495SMark 3.01204715227604046988E-2F, 119e0eea495SMark 3.73774314173823228969E-2F, 120e0eea495SMark 4.88280347570998239232E-2F, 121e0eea495SMark 7.03124996963957469739E-2F, 122e0eea495SMark 1.24999999999870820058E-1F, 123e0eea495SMark 4.99999999999999999821E-1F 124e0eea495SMark }; 125e0eea495SMark #else 12652cdd6eaSMark static const PetscReal P1[] = 127e0eea495SMark { 128e0eea495SMark 1.37982864606273237150E-4, 129e0eea495SMark 2.28025724005875567385E-3, 130e0eea495SMark 7.97404013220415179367E-3, 131e0eea495SMark 9.85821379021226008714E-3, 132e0eea495SMark 6.87489687449949877925E-3, 133e0eea495SMark 6.18901033637687613229E-3, 134e0eea495SMark 8.79078273952743772254E-3, 135e0eea495SMark 1.49380448916805252718E-2, 136e0eea495SMark 3.08851465246711995998E-2, 137e0eea495SMark 9.65735902811690126535E-2, 138e0eea495SMark 1.38629436111989062502E0 139e0eea495SMark }; 14052cdd6eaSMark static const PetscReal Q1[] = 141e0eea495SMark { 142e0eea495SMark 2.94078955048598507511E-5, 143e0eea495SMark 9.14184723865917226571E-4, 144e0eea495SMark 5.94058303753167793257E-3, 145e0eea495SMark 1.54850516649762399335E-2, 146e0eea495SMark 2.39089602715924892727E-2, 147e0eea495SMark 3.01204715227604046988E-2, 148e0eea495SMark 3.73774314173823228969E-2, 149e0eea495SMark 4.88280347570998239232E-2, 150e0eea495SMark 7.03124996963957469739E-2, 151e0eea495SMark 1.24999999999870820058E-1, 152e0eea495SMark 4.99999999999999999821E-1 153e0eea495SMark }; 154e0eea495SMark #endif 155e0eea495SMark x = 1 - x; /* where m = 1 - m1 */ 156e0eea495SMark *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1); 157e0eea495SMark } 158a587d139SMark /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */ 159e0eea495SMark PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask) 160e0eea495SMark { 161e0eea495SMark PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt; 162e0eea495SMark //PetscReal mask /* = !!(r!=rp || z!=zp) */; 163e0eea495SMark /* !!(zmzp2 > 1.e-12 || (r-rp) > 1.e-12 || (r-rp) < -1.e-12); */ 164e0eea495SMark r2=PetscSqr(r); 165e0eea495SMark zmzp=z-zp; 166e0eea495SMark rp2=PetscSqr(rp); 167e0eea495SMark zmzp2=PetscSqr(zmzp); 168e0eea495SMark r2prp2=r2+rp2; 169e0eea495SMark l = r2 + rp2 + zmzp2; 170e0eea495SMark /* if (zmzp2 > PETSC_SMALL) mask = 1; */ 171e0eea495SMark /* else if ((tt=(r-rp)) > PETSC_SMALL) mask = 1; */ 172e0eea495SMark /* else if (tt < -PETSC_SMALL) mask = 1; */ 173e0eea495SMark /* else mask = 0; */ 174e0eea495SMark s = mask*2*r*rp/l; /* mask for vectorization */ 175e0eea495SMark tt = 1./(1+s); 176e0eea495SMark pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l); 177*cefb98e8SMark Adams sqrt_1s = PetscSqrtReal(1.+s); 178e0eea495SMark /* sp.ellipe(2.*s/(1.+s)) */ 179e0eea495SMark ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */ 180e0eea495SMark /* sp.ellipk(2.*s/(1.+s)) */ 181e0eea495SMark ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */ 182e0eea495SMark /* mask is needed here just for single precision */ 183e0eea495SMark i2func = 2./((1-s)*sqrt_1s) * es; 184e0eea495SMark i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es); 185e0eea495SMark i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks); 186a587d139SMark Ud[0][0]= -pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func); 187a587d139SMark Ud[0][1]=Ud[1][0]=Uk[0][1]= pi4pow*(zmzp)*(r*i2func-rp*i3func); 188a587d139SMark Uk[1][1]=Ud[1][1]= -pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask; 189a587d139SMark Uk[0][0]= -pi4pow*(zmzp2*i3func+r*rp*i1func); 190a587d139SMark Uk[1][0]= pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */ 191e0eea495SMark } 19252cdd6eaSMark #else 19352cdd6eaSMark /* integration point functions */ 19452cdd6eaSMark /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */ 19552cdd6eaSMark /* if x==y we will return zero. This is not the correct result */ 19652cdd6eaSMark /* since the tensor diverges for x==y but when integrated */ 19752cdd6eaSMark /* the divergent part is antisymmetric and vanishes. This is not */ 19852cdd6eaSMark /* trivial, but can be proven. */ 19952cdd6eaSMark PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask) 20052cdd6eaSMark { 20152cdd6eaSMark PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp}; 20252cdd6eaSMark PetscInt d; 20352cdd6eaSMark for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) { 20452cdd6eaSMark dx[d] = x2[d] - x1[d]; 20552cdd6eaSMark norm2 += dx[d] * dx[d]; 20652cdd6eaSMark } 20752cdd6eaSMark inorm2 = mask/norm2; 208*cefb98e8SMark Adams inorm = PetscSqrtReal(inorm2); 20952cdd6eaSMark inorm3 = inorm2*inorm; 210bcff154bSMark Adams for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]); 211a587d139SMark U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1]; 212a587d139SMark U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1]; 213a587d139SMark U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2]; 21452cdd6eaSMark } 215*cefb98e8SMark Adams /* Relativistic form */ 216*cefb98e8SMark Adams #define GAMMA3(_x,_c02) PetscSqrtReal(1.0 + ((_x[0]*_x[0]) + (_x[1]*_x[1]) + (_x[2]*_x[2]))/(_c02)) 217*cefb98e8SMark Adams PETSC_DEVICE_FUNC_DECL void LandauTensor3DRelativistic(const PetscReal a_x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask, PetscReal c0) 218*cefb98e8SMark Adams { 219*cefb98e8SMark Adams const PetscReal x2[3] = {xp,yp,zp}, x1[3] = {a_x1[0],a_x1[1],a_x1[2]}, c02 = c0*c0, g1 = GAMMA3(x1,c02), g2 = GAMMA3(x2,c02), g1_eps = g1 - 1., g2_eps = g2 - 1., gg_eps = g1_eps + g2_eps + g1_eps*g2_eps; 220*cefb98e8SMark Adams PetscReal fact, u1u2, diff[3], udiff2,u12,u22,wsq,rsq, tt; 221*cefb98e8SMark Adams PetscInt i,j; 222*cefb98e8SMark Adams 223*cefb98e8SMark Adams if (mask==0.0) { 224*cefb98e8SMark Adams for (i = 0; i < 3; ++i) { 225*cefb98e8SMark Adams for (j = 0; j < 3; ++j) { 226*cefb98e8SMark Adams U[i][j] = 0; 227*cefb98e8SMark Adams } 228*cefb98e8SMark Adams } 229*cefb98e8SMark Adams } else { 230*cefb98e8SMark Adams for (i = 0, u1u2 = u12 = u22 = udiff2 = 0; i < 3; ++i) { 231*cefb98e8SMark Adams diff[i] = x1[i] - x2[i]; 232*cefb98e8SMark Adams udiff2 += diff[i] * diff[i]; 233*cefb98e8SMark Adams u12 += x1[i]*x1[i]; 234*cefb98e8SMark Adams u22 += x2[i]*x2[i]; 235*cefb98e8SMark Adams u1u2 += x1[i]*x2[i]; 236*cefb98e8SMark Adams } 237*cefb98e8SMark Adams tt = 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02; // these two terms are about the same with opposite sign 238*cefb98e8SMark Adams wsq = udiff2 + tt; 239*cefb98e8SMark Adams //wsq = udiff2 + 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02; 240*cefb98e8SMark Adams rsq = 1.+wsq/c02; 241*cefb98e8SMark Adams fact = -rsq/(g1*g2*PetscSqrtReal(wsq)); /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */ 242*cefb98e8SMark Adams for (i = 0; i < 3; ++i) { 243*cefb98e8SMark Adams for (j = 0; j < 3; ++j) { 244*cefb98e8SMark Adams U[i][j] = fact * ( -diff[i]*diff[j]/wsq + (PetscSqrtReal(rsq)-1.)*(x1[i]*x2[j] + x1[j]*x2[i])/wsq); 245*cefb98e8SMark Adams } 246*cefb98e8SMark Adams U[i][i] += fact; 247*cefb98e8SMark Adams } 248*cefb98e8SMark Adams #if defined(PETSC_USE_DEBUG) 249*cefb98e8SMark Adams { 250*cefb98e8SMark Adams PetscReal diff_g[3], udiff = sqrt(udiff2), err, err2; 251*cefb98e8SMark Adams for (i = 0; i < 3; ++i) diff_g[i] = x1[i]/g1 - x2[i]/g2; 252*cefb98e8SMark Adams for (i = 0, err = 0; i < 3; ++i) { 253*cefb98e8SMark Adams double tmp=0; 254*cefb98e8SMark Adams for (j = 0; j < 3; ++j) { 255*cefb98e8SMark Adams tmp += U[i][j]*diff_g[j]; 256*cefb98e8SMark Adams } 257*cefb98e8SMark Adams err += tmp * tmp; 258*cefb98e8SMark Adams } 259*cefb98e8SMark Adams err = sqrt(err); 260*cefb98e8SMark Adams err2 = udiff2*(err)/(g1*g2); 261*cefb98e8SMark Adams #if defined(PETSC_USE_REAL_SINGLE) 262*cefb98e8SMark Adams if (err>1.e-6 || err!=err) exit(11); 263*cefb98e8SMark Adams #else 264*cefb98e8SMark Adams if (err>1.e-13 || err!=err) exit(12); 265*cefb98e8SMark Adams #endif 266*cefb98e8SMark Adams } 267*cefb98e8SMark Adams #endif 268*cefb98e8SMark Adams } 269*cefb98e8SMark Adams } 270*cefb98e8SMark Adams 271e0eea495SMark #endif 272