1 #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q)) 2 #define LANDAU_SQRT(q) PetscSqrtReal(q) 3 4 #if defined(__CUDA_ARCH__) 5 #define PETSC_DEVICE_FUNC_DECL __device__ 6 #elif defined(KOKKOS_INLINE_FUNCTION) 7 #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION 8 #else 9 #define PETSC_DEVICE_FUNC_DECL static 10 #endif 11 12 #if LANDAU_DIM==2 13 /* elliptic functions 14 */ 15 PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, const PetscReal coef[]) 16 { 17 PetscReal ans; 18 PetscInt i; 19 ans = coef[0]; 20 for (i=1; i<11; i++) ans = ans * x + coef[i]; 21 return(ans); 22 } 23 PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, const PetscReal coef[]) 24 { 25 PetscReal ans; 26 PetscInt i; 27 ans = coef[0]; 28 for (i=1; i<10; i++) ans = ans * x + coef[i]; 29 return(ans); 30 } 31 /* 32 * Complete elliptic integral of the second kind 33 */ 34 PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret) 35 { 36 #if defined(PETSC_USE_REAL_SINGLE) 37 static const PetscReal P2[] = { 38 1.53552577301013293365E-4F, 39 2.50888492163602060990E-3F, 40 8.68786816565889628429E-3F, 41 1.07350949056076193403E-2F, 42 7.77395492516787092951E-3F, 43 7.58395289413514708519E-3F, 44 1.15688436810574127319E-2F, 45 2.18317996015557253103E-2F, 46 5.68051945617860553470E-2F, 47 4.43147180560990850618E-1F, 48 1.00000000000000000299E0F 49 }; 50 static const PetscReal Q2[] = { 51 3.27954898576485872656E-5F, 52 1.00962792679356715133E-3F, 53 6.50609489976927491433E-3F, 54 1.68862163993311317300E-2F, 55 2.61769742454493659583E-2F, 56 3.34833904888224918614E-2F, 57 4.27180926518931511717E-2F, 58 5.85936634471101055642E-2F, 59 9.37499997197644278445E-2F, 60 2.49999999999888314361E-1F 61 }; 62 #else 63 static const PetscReal P2[] = { 64 1.53552577301013293365E-4, 65 2.50888492163602060990E-3, 66 8.68786816565889628429E-3, 67 1.07350949056076193403E-2, 68 7.77395492516787092951E-3, 69 7.58395289413514708519E-3, 70 1.15688436810574127319E-2, 71 2.18317996015557253103E-2, 72 5.68051945617860553470E-2, 73 4.43147180560990850618E-1, 74 1.00000000000000000299E0 75 }; 76 static const PetscReal Q2[] = { 77 3.27954898576485872656E-5, 78 1.00962792679356715133E-3, 79 6.50609489976927491433E-3, 80 1.68862163993311317300E-2, 81 2.61769742454493659583E-2, 82 3.34833904888224918614E-2, 83 4.27180926518931511717E-2, 84 5.85936634471101055642E-2, 85 9.37499997197644278445E-2, 86 2.49999999999888314361E-1 87 }; 88 #endif 89 x = 1 - x; /* where m = 1 - m1 */ 90 *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2)); 91 } 92 /* 93 * Complete elliptic integral of the first kind 94 */ 95 PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret) 96 { 97 #if defined(PETSC_USE_REAL_SINGLE) 98 static const PetscReal P1[] = 99 { 100 1.37982864606273237150E-4F, 101 2.28025724005875567385E-3F, 102 7.97404013220415179367E-3F, 103 9.85821379021226008714E-3F, 104 6.87489687449949877925E-3F, 105 6.18901033637687613229E-3F, 106 8.79078273952743772254E-3F, 107 1.49380448916805252718E-2F, 108 3.08851465246711995998E-2F, 109 9.65735902811690126535E-2F, 110 1.38629436111989062502E0F 111 }; 112 static const PetscReal Q1[] = 113 { 114 2.94078955048598507511E-5F, 115 9.14184723865917226571E-4F, 116 5.94058303753167793257E-3F, 117 1.54850516649762399335E-2F, 118 2.39089602715924892727E-2F, 119 3.01204715227604046988E-2F, 120 3.73774314173823228969E-2F, 121 4.88280347570998239232E-2F, 122 7.03124996963957469739E-2F, 123 1.24999999999870820058E-1F, 124 4.99999999999999999821E-1F 125 }; 126 #else 127 static const PetscReal P1[] = 128 { 129 1.37982864606273237150E-4, 130 2.28025724005875567385E-3, 131 7.97404013220415179367E-3, 132 9.85821379021226008714E-3, 133 6.87489687449949877925E-3, 134 6.18901033637687613229E-3, 135 8.79078273952743772254E-3, 136 1.49380448916805252718E-2, 137 3.08851465246711995998E-2, 138 9.65735902811690126535E-2, 139 1.38629436111989062502E0 140 }; 141 static const PetscReal Q1[] = 142 { 143 2.94078955048598507511E-5, 144 9.14184723865917226571E-4, 145 5.94058303753167793257E-3, 146 1.54850516649762399335E-2, 147 2.39089602715924892727E-2, 148 3.01204715227604046988E-2, 149 3.73774314173823228969E-2, 150 4.88280347570998239232E-2, 151 7.03124996963957469739E-2, 152 1.24999999999870820058E-1, 153 4.99999999999999999821E-1 154 }; 155 #endif 156 x = 1 - x; /* where m = 1 - m1 */ 157 *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1); 158 } 159 /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */ 160 PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask) 161 { 162 PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt; 163 //PetscReal mask /* = !!(r!=rp || z!=zp) */; 164 /* !!(zmzp2 > 1.e-12 || (r-rp) > 1.e-12 || (r-rp) < -1.e-12); */ 165 r2=PetscSqr(r); 166 zmzp=z-zp; 167 rp2=PetscSqr(rp); 168 zmzp2=PetscSqr(zmzp); 169 r2prp2=r2+rp2; 170 l = r2 + rp2 + zmzp2; 171 /* if (zmzp2 > PETSC_SMALL) mask = 1; */ 172 /* else if ((tt=(r-rp)) > PETSC_SMALL) mask = 1; */ 173 /* else if (tt < -PETSC_SMALL) mask = 1; */ 174 /* else mask = 0; */ 175 s = mask*2*r*rp/l; /* mask for vectorization */ 176 tt = 1./(1+s); 177 pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l); 178 sqrt_1s = LANDAU_SQRT(1.+s); 179 /* sp.ellipe(2.*s/(1.+s)) */ 180 ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */ 181 /* sp.ellipk(2.*s/(1.+s)) */ 182 ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */ 183 /* mask is needed here just for single precision */ 184 i2func = 2./((1-s)*sqrt_1s) * es; 185 i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es); 186 i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks); 187 Ud[0][0]= -pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func); 188 Ud[0][1]=Ud[1][0]=Uk[0][1]= pi4pow*(zmzp)*(r*i2func-rp*i3func); 189 Uk[1][1]=Ud[1][1]= -pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask; 190 Uk[0][0]= -pi4pow*(zmzp2*i3func+r*rp*i1func); 191 Uk[1][0]= pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */ 192 } 193 #else 194 /* integration point functions */ 195 /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */ 196 /* if x==y we will return zero. This is not the correct result */ 197 /* since the tensor diverges for x==y but when integrated */ 198 /* the divergent part is antisymmetric and vanishes. This is not */ 199 /* trivial, but can be proven. */ 200 PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask) 201 { 202 PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp}; 203 PetscInt d; 204 for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) { 205 dx[d] = x2[d] - x1[d]; 206 norm2 += dx[d] * dx[d]; 207 } 208 inorm2 = mask/norm2; 209 inorm = LANDAU_SQRT(inorm2); 210 inorm3 = inorm2*inorm; 211 for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]); 212 U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1]; 213 U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1]; 214 U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2]; 215 } 216 #endif 217