1*ed264d09SValeria Barra // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2*ed264d09SValeria Barra // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3*ed264d09SValeria Barra // reserved. See files LICENSE and NOTICE for details. 4*ed264d09SValeria Barra // 5*ed264d09SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software 6*ed264d09SValeria Barra // libraries and APIs for efficient high-order finite element and spectral 7*ed264d09SValeria Barra // element discretizations for exascale applications. For more information and 8*ed264d09SValeria Barra // source code availability see http://github.com/ceed. 9*ed264d09SValeria Barra // 10*ed264d09SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*ed264d09SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office 12*ed264d09SValeria Barra // of Science and the National Nuclear Security Administration) responsible for 13*ed264d09SValeria Barra // the planning and preparation of a capable exascale ecosystem, including 14*ed264d09SValeria Barra // software, applications, hardware, advanced system engineering and early 15*ed264d09SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative. 16*ed264d09SValeria Barra 17*ed264d09SValeria Barra /// @file 18*ed264d09SValeria Barra /// libCEED QFunctions for diffusion operator example for a scalar field on the sphere using PETSc 19*ed264d09SValeria Barra 20*ed264d09SValeria Barra #ifndef __CUDACC__ 21*ed264d09SValeria Barra # include <math.h> 22*ed264d09SValeria Barra #endif 23*ed264d09SValeria Barra 24*ed264d09SValeria Barra // ***************************************************************************** 25*ed264d09SValeria Barra // This QFunction sets up the geometric factors required for integration and 26*ed264d09SValeria Barra // coordinate transformations when reference coordinates have a different 27*ed264d09SValeria Barra // dimension than the one of physical coordinates 28*ed264d09SValeria Barra // 29*ed264d09SValeria Barra // Reference (parent) 2D coordinates: X \in [-1, 1]^2 30*ed264d09SValeria Barra // 31*ed264d09SValeria Barra // Global 3D physical coordinates given by the mesh: xx \in [-R, R]^3 32*ed264d09SValeria Barra // with R radius of the sphere 33*ed264d09SValeria Barra // 34*ed264d09SValeria Barra // Local 3D physical coordinates on the 2D manifold: x \in [-l, l]^3 35*ed264d09SValeria Barra // with l half edge of the cube inscribed in the sphere 36*ed264d09SValeria Barra // 37*ed264d09SValeria Barra // Change of coordinates matrix computed by the library: 38*ed264d09SValeria Barra // (physical 3D coords relative to reference 2D coords) 39*ed264d09SValeria Barra // dxx_j/dX_i (indicial notation) [3 * 2] 40*ed264d09SValeria Barra // 41*ed264d09SValeria Barra // Change of coordinates x (on the 2D manifold) relative to xx (phyisical 3D): 42*ed264d09SValeria Barra // dx_i/dxx_j (indicial notation) [3 * 3] 43*ed264d09SValeria Barra // 44*ed264d09SValeria Barra // Change of coordinates x (on the 2D manifold) relative to X (reference 2D): 45*ed264d09SValeria Barra // (by chain rule) 46*ed264d09SValeria Barra // dx_i/dX_j [3 * 2] = dx_i/dxx_k [3 * 3] * dxx_k/dX_j [3 * 2] 47*ed264d09SValeria Barra // 48*ed264d09SValeria Barra // modJ is given by the magnitude of the cross product of the columns of dx_i/dX_j 49*ed264d09SValeria Barra // 50*ed264d09SValeria Barra // The quadrature data is stored in the array qdata. 51*ed264d09SValeria Barra // 52*ed264d09SValeria Barra // We require the determinant of the Jacobian to properly compute integrals of 53*ed264d09SValeria Barra // the form: int( u v ) 54*ed264d09SValeria Barra // 55*ed264d09SValeria Barra // qdata[0]: modJ * w 56*ed264d09SValeria Barra // 57*ed264d09SValeria Barra // We use the Moore–Penrose (left) pseudoinverse of dx_i/dX_j, to compute dX_i/dx_j (and its transpose), 58*ed264d09SValeria Barra // needed to properly compute integrals of the form: int( gradv gradu ) 59*ed264d09SValeria Barra // 60*ed264d09SValeria Barra // dX_i/dx_j [2 * 3] = (dx_i/dX_j)+ = (dxdX^T dxdX)^(-1) dxdX 61*ed264d09SValeria Barra // 62*ed264d09SValeria Barra // Stored: dX_i/dx_j * dX_j/dx_i (in Voigt convention) 63*ed264d09SValeria Barra // in qdata[1:3] as 64*ed264d09SValeria Barra // [dXdxdXdxT11 dXdxdXdxT12] 65*ed264d09SValeria Barra // [dXdxdXdxT21 dXdxdXdxT22] 66*ed264d09SValeria Barra // ***************************************************************************** 67*ed264d09SValeria Barra 68*ed264d09SValeria Barra // ----------------------------------------------------------------------------- 69*ed264d09SValeria Barra CEED_QFUNCTION(SetupDiffGeo)(void *ctx, CeedInt Q, 70*ed264d09SValeria Barra const CeedScalar *const *in, 71*ed264d09SValeria Barra CeedScalar *const *out) { 72*ed264d09SValeria Barra const CeedScalar *X = in[0], *J = in[1], *w = in[2]; 73*ed264d09SValeria Barra CeedScalar *qdata = out[0]; 74*ed264d09SValeria Barra 75*ed264d09SValeria Barra // Quadrature Point Loop 76*ed264d09SValeria Barra CeedPragmaSIMD 77*ed264d09SValeria Barra for (CeedInt i=0; i<Q; i++) { 78*ed264d09SValeria Barra // Read global Cartesian coordinates 79*ed264d09SValeria Barra const CeedScalar xx[3] = {X[i+0*Q], 80*ed264d09SValeria Barra X[i+1*Q], 81*ed264d09SValeria Barra X[i+2*Q] 82*ed264d09SValeria Barra }; 83*ed264d09SValeria Barra 84*ed264d09SValeria Barra // Read dxxdX Jacobian entries, stored as 85*ed264d09SValeria Barra // 0 3 86*ed264d09SValeria Barra // 1 4 87*ed264d09SValeria Barra // 2 5 88*ed264d09SValeria Barra const CeedScalar dxxdX[3][2] = {{J[i+Q*0], 89*ed264d09SValeria Barra J[i+Q*3]}, 90*ed264d09SValeria Barra {J[i+Q*1], 91*ed264d09SValeria Barra J[i+Q*4]}, 92*ed264d09SValeria Barra {J[i+Q*2], 93*ed264d09SValeria Barra J[i+Q*5]} 94*ed264d09SValeria Barra }; 95*ed264d09SValeria Barra 96*ed264d09SValeria Barra // Setup 97*ed264d09SValeria Barra // x = xx (xx^T xx)^{-1/2} 98*ed264d09SValeria Barra // dx/dxx = I (xx^T xx)^{-1/2} - xx xx^T (xx^T xx)^{-3/2} 99*ed264d09SValeria Barra const CeedScalar modxxsq = xx[0]*xx[0]+xx[1]*xx[1]+xx[2]*xx[2]; 100*ed264d09SValeria Barra CeedScalar xxsq[3][3]; 101*ed264d09SValeria Barra for (int j=0; j<3; j++) 102*ed264d09SValeria Barra for (int k=0; k<3; k++) 103*ed264d09SValeria Barra xxsq[j][k] = xx[j]*xx[k] / (sqrt(modxxsq) * modxxsq); 104*ed264d09SValeria Barra 105*ed264d09SValeria Barra const CeedScalar dxdxx[3][3] = {{1./sqrt(modxxsq) - xxsq[0][0], 106*ed264d09SValeria Barra -xxsq[0][1], 107*ed264d09SValeria Barra -xxsq[0][2]}, 108*ed264d09SValeria Barra {-xxsq[1][0], 109*ed264d09SValeria Barra 1./sqrt(modxxsq) - xxsq[1][1], 110*ed264d09SValeria Barra -xxsq[1][2]}, 111*ed264d09SValeria Barra {-xxsq[2][0], 112*ed264d09SValeria Barra -xxsq[2][1], 113*ed264d09SValeria Barra 1./sqrt(modxxsq) - xxsq[2][2]} 114*ed264d09SValeria Barra }; 115*ed264d09SValeria Barra 116*ed264d09SValeria Barra CeedScalar dxdX[3][2]; 117*ed264d09SValeria Barra for (int j=0; j<3; j++) 118*ed264d09SValeria Barra for (int k=0; k<2; k++) { 119*ed264d09SValeria Barra dxdX[j][k] = 0; 120*ed264d09SValeria Barra for (int l=0; l<3; l++) 121*ed264d09SValeria Barra dxdX[j][k] += dxdxx[j][l]*dxxdX[l][k]; 122*ed264d09SValeria Barra } 123*ed264d09SValeria Barra 124*ed264d09SValeria Barra // J is given by the cross product of the columns of dxdX 125*ed264d09SValeria Barra const CeedScalar J[3]= {dxdX[1][0]*dxdX[2][1] - dxdX[2][0]*dxdX[1][1], 126*ed264d09SValeria Barra dxdX[2][0]*dxdX[0][1] - dxdX[0][0]*dxdX[2][1], 127*ed264d09SValeria Barra dxdX[0][0]*dxdX[1][1] - dxdX[1][0]*dxdX[0][1] 128*ed264d09SValeria Barra }; 129*ed264d09SValeria Barra 130*ed264d09SValeria Barra // Use the magnitude of J as our detJ (volume scaling factor) 131*ed264d09SValeria Barra const CeedScalar modJ = sqrt(J[0]*J[0]+J[1]*J[1]+J[2]*J[2]); 132*ed264d09SValeria Barra 133*ed264d09SValeria Barra // Interp-to-Interp qdata 134*ed264d09SValeria Barra qdata[i+Q*0] = modJ * w[i]; 135*ed264d09SValeria Barra 136*ed264d09SValeria Barra // dxdX_j,k * dxdX_k,j, needed for the pseudoinverse 137*ed264d09SValeria Barra CeedScalar dxdXTdxdX[2][2]; 138*ed264d09SValeria Barra for (int j=0; j<2; j++) 139*ed264d09SValeria Barra for (int k=0; k<2; k++) { 140*ed264d09SValeria Barra dxdXTdxdX[j][k] = 0; 141*ed264d09SValeria Barra for (int l=0; l<3; l++) 142*ed264d09SValeria Barra dxdXTdxdX[j][k] += dxdX[l][j]*dxdX[l][k]; 143*ed264d09SValeria Barra } 144*ed264d09SValeria Barra 145*ed264d09SValeria Barra const CeedScalar detdxdXTdxdX = dxdXTdxdX[0][0] * dxdXTdxdX[1][1] 146*ed264d09SValeria Barra -dxdXTdxdX[1][0] * dxdXTdxdX[0][1]; 147*ed264d09SValeria Barra 148*ed264d09SValeria Barra // Compute inverse of dxdXTdxdX, needed for the pseudoinverse 149*ed264d09SValeria Barra CeedScalar dxdXTdxdXinv[2][2]; 150*ed264d09SValeria Barra dxdXTdxdXinv[0][0] = dxdXTdxdX[1][1] / detdxdXTdxdX; 151*ed264d09SValeria Barra dxdXTdxdXinv[0][1] = -dxdXTdxdX[0][1] / detdxdXTdxdX; 152*ed264d09SValeria Barra dxdXTdxdXinv[1][0] = -dxdXTdxdX[1][0] / detdxdXTdxdX; 153*ed264d09SValeria Barra dxdXTdxdXinv[1][1] = dxdXTdxdX[0][0] / detdxdXTdxdX; 154*ed264d09SValeria Barra 155*ed264d09SValeria Barra // Compute the pseudo inverse of dxdX 156*ed264d09SValeria Barra CeedScalar pseudodXdx[2][3]; 157*ed264d09SValeria Barra for (int j=0; j<2; j++) 158*ed264d09SValeria Barra for (int k=0; k<3; k++) { 159*ed264d09SValeria Barra pseudodXdx[j][k] = 0; 160*ed264d09SValeria Barra for (int l=0; l<2; l++) 161*ed264d09SValeria Barra pseudodXdx[j][k] += dxdXTdxdXinv[j][l]*dxdX[k][l]; 162*ed264d09SValeria Barra } 163*ed264d09SValeria Barra 164*ed264d09SValeria Barra // Grad-to-Grad qdata is given by pseudodXdx * pseudodXdxT 165*ed264d09SValeria Barra CeedScalar dXdxdXdxT[2][2]; 166*ed264d09SValeria Barra for (int j=0; j<2; j++) 167*ed264d09SValeria Barra for (int k=0; k<2; k++) { 168*ed264d09SValeria Barra dXdxdXdxT[j][k] = 0; 169*ed264d09SValeria Barra for (int l=0; l<3; l++) 170*ed264d09SValeria Barra dXdxdXdxT[j][k] += pseudodXdx[j][l]*pseudodXdx[k][l]; 171*ed264d09SValeria Barra } 172*ed264d09SValeria Barra 173*ed264d09SValeria Barra // Stored in Voigt convention 174*ed264d09SValeria Barra qdata[i+Q*1] = dXdxdXdxT[0][0]; 175*ed264d09SValeria Barra qdata[i+Q*2] = dXdxdXdxT[1][1]; 176*ed264d09SValeria Barra qdata[i+Q*3] = dXdxdXdxT[0][1]; 177*ed264d09SValeria Barra 178*ed264d09SValeria Barra } // End of Quadrature Point Loop 179*ed264d09SValeria Barra 180*ed264d09SValeria Barra // Return 181*ed264d09SValeria Barra return 0; 182*ed264d09SValeria Barra } 183*ed264d09SValeria Barra 184*ed264d09SValeria Barra // ***************************************************************************** 185*ed264d09SValeria Barra // This QFunction sets up the rhs and true solution for the problem 186*ed264d09SValeria Barra // ***************************************************************************** 187*ed264d09SValeria Barra 188*ed264d09SValeria Barra // ----------------------------------------------------------------------------- 189*ed264d09SValeria Barra CEED_QFUNCTION(SetupDiffRhs)(void *ctx, CeedInt Q, 190*ed264d09SValeria Barra const CeedScalar *const *in, 191*ed264d09SValeria Barra CeedScalar *const *out) { 192*ed264d09SValeria Barra // Inputs 193*ed264d09SValeria Barra const CeedScalar *X = in[0], *qdata = in[1]; 194*ed264d09SValeria Barra // Outputs 195*ed264d09SValeria Barra CeedScalar *true_soln = out[0], *rhs = out[1]; 196*ed264d09SValeria Barra 197*ed264d09SValeria Barra // Context 198*ed264d09SValeria Barra const CeedScalar *context = (const CeedScalar*)ctx; 199*ed264d09SValeria Barra const CeedScalar R = context[0]; 200*ed264d09SValeria Barra 201*ed264d09SValeria Barra // Quadrature Point Loop 202*ed264d09SValeria Barra CeedPragmaSIMD 203*ed264d09SValeria Barra for (CeedInt i=0; i<Q; i++) { 204*ed264d09SValeria Barra // Read global Cartesian coordinates 205*ed264d09SValeria Barra CeedScalar x = X[i+Q*0], y = X[i+Q*1], z = X[i+Q*2]; 206*ed264d09SValeria Barra // Normalize quadrature point coordinates to sphere 207*ed264d09SValeria Barra CeedScalar rad = sqrt(x*x + y*y + z*z); 208*ed264d09SValeria Barra x *= R / rad; 209*ed264d09SValeria Barra y *= R / rad; 210*ed264d09SValeria Barra z *= R / rad; 211*ed264d09SValeria Barra // Compute latitude and longitude 212*ed264d09SValeria Barra const CeedScalar theta = asin(z / R); // latitude 213*ed264d09SValeria Barra const CeedScalar lambda = atan2(y, x); // longitude 214*ed264d09SValeria Barra 215*ed264d09SValeria Barra true_soln[i+Q*0] = sin(lambda) * cos(theta); 216*ed264d09SValeria Barra 217*ed264d09SValeria Barra rhs[i+Q*0] = qdata[i+Q*0] * 2 * sin(lambda)*cos(theta) / (R*R); 218*ed264d09SValeria Barra 219*ed264d09SValeria Barra } // End of Quadrature Point Loop 220*ed264d09SValeria Barra 221*ed264d09SValeria Barra return 0; 222*ed264d09SValeria Barra } 223*ed264d09SValeria Barra 224*ed264d09SValeria Barra // ***************************************************************************** 225*ed264d09SValeria Barra // This QFunction applies the diffusion operator for a scalar field. 226*ed264d09SValeria Barra // 227*ed264d09SValeria Barra // Inputs: 228*ed264d09SValeria Barra // ug - Input vector gradient at quadrature points 229*ed264d09SValeria Barra // qdata - Geometric factors 230*ed264d09SValeria Barra // 231*ed264d09SValeria Barra // Output: 232*ed264d09SValeria Barra // vg - Output vector (test functions) gradient at quadrature points 233*ed264d09SValeria Barra // 234*ed264d09SValeria Barra // ***************************************************************************** 235*ed264d09SValeria Barra 236*ed264d09SValeria Barra // ----------------------------------------------------------------------------- 237*ed264d09SValeria Barra CEED_QFUNCTION(Diff)(void *ctx, CeedInt Q, 238*ed264d09SValeria Barra const CeedScalar *const *in, CeedScalar *const *out) { 239*ed264d09SValeria Barra // Inputs 240*ed264d09SValeria Barra const CeedScalar *ug = in[0], *qdata = in[1]; 241*ed264d09SValeria Barra // Outputs 242*ed264d09SValeria Barra CeedScalar *vg = out[0]; 243*ed264d09SValeria Barra 244*ed264d09SValeria Barra // Quadrature Point Loop 245*ed264d09SValeria Barra CeedPragmaSIMD 246*ed264d09SValeria Barra for (CeedInt i=0; i<Q; i++) { 247*ed264d09SValeria Barra // Read spatial derivatives of u 248*ed264d09SValeria Barra const CeedScalar du[2] = {ug[i+Q*0], 249*ed264d09SValeria Barra ug[i+Q*1] 250*ed264d09SValeria Barra }; 251*ed264d09SValeria Barra // Read qdata 252*ed264d09SValeria Barra const CeedScalar wJ = qdata[i+Q*0]; 253*ed264d09SValeria Barra // -- Grad-to-Grad qdata 254*ed264d09SValeria Barra // ---- dXdx_j,k * dXdx_k,j 255*ed264d09SValeria Barra const CeedScalar dXdxdXdxT[2][2] = {{qdata[i+Q*1], 256*ed264d09SValeria Barra qdata[i+Q*3]}, 257*ed264d09SValeria Barra {qdata[i+Q*3], 258*ed264d09SValeria Barra qdata[i+Q*2]} 259*ed264d09SValeria Barra }; 260*ed264d09SValeria Barra 261*ed264d09SValeria Barra for (int j=0; j<2; j++) // j = direction of vg 262*ed264d09SValeria Barra vg[i+j*Q] = wJ * (du[0] * dXdxdXdxT[0][j] + 263*ed264d09SValeria Barra du[1] * dXdxdXdxT[1][j]); 264*ed264d09SValeria Barra 265*ed264d09SValeria Barra } // End of Quadrature Point Loop 266*ed264d09SValeria Barra 267*ed264d09SValeria Barra return 0; 268*ed264d09SValeria Barra } 269*ed264d09SValeria Barra // ----------------------------------------------------------------------------- 270