xref: /libCEED/examples/petsc/qfunctions/bps/bp3sphere.h (revision ed264d09f1c2ca67d20420ee135d5f5156727a4b)
1*ed264d09SValeria Barra // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2*ed264d09SValeria Barra // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3*ed264d09SValeria Barra // reserved. See files LICENSE and NOTICE for details.
4*ed264d09SValeria Barra //
5*ed264d09SValeria Barra // This file is part of CEED, a collection of benchmarks, miniapps, software
6*ed264d09SValeria Barra // libraries and APIs for efficient high-order finite element and spectral
7*ed264d09SValeria Barra // element discretizations for exascale applications. For more information and
8*ed264d09SValeria Barra // source code availability see http://github.com/ceed.
9*ed264d09SValeria Barra //
10*ed264d09SValeria Barra // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*ed264d09SValeria Barra // a collaborative effort of two U.S. Department of Energy organizations (Office
12*ed264d09SValeria Barra // of Science and the National Nuclear Security Administration) responsible for
13*ed264d09SValeria Barra // the planning and preparation of a capable exascale ecosystem, including
14*ed264d09SValeria Barra // software, applications, hardware, advanced system engineering and early
15*ed264d09SValeria Barra // testbed platforms, in support of the nation's exascale computing imperative.
16*ed264d09SValeria Barra 
17*ed264d09SValeria Barra /// @file
18*ed264d09SValeria Barra /// libCEED QFunctions for diffusion operator example for a scalar field on the sphere using PETSc
19*ed264d09SValeria Barra 
20*ed264d09SValeria Barra #ifndef __CUDACC__
21*ed264d09SValeria Barra #  include <math.h>
22*ed264d09SValeria Barra #endif
23*ed264d09SValeria Barra 
24*ed264d09SValeria Barra // *****************************************************************************
25*ed264d09SValeria Barra // This QFunction sets up the geometric factors required for integration and
26*ed264d09SValeria Barra //   coordinate transformations when reference coordinates have a different
27*ed264d09SValeria Barra //   dimension than the one of physical coordinates
28*ed264d09SValeria Barra //
29*ed264d09SValeria Barra // Reference (parent) 2D coordinates: X \in [-1, 1]^2
30*ed264d09SValeria Barra //
31*ed264d09SValeria Barra // Global 3D physical coordinates given by the mesh: xx \in [-R, R]^3
32*ed264d09SValeria Barra //   with R radius of the sphere
33*ed264d09SValeria Barra //
34*ed264d09SValeria Barra // Local 3D physical coordinates on the 2D manifold: x \in [-l, l]^3
35*ed264d09SValeria Barra //   with l half edge of the cube inscribed in the sphere
36*ed264d09SValeria Barra //
37*ed264d09SValeria Barra // Change of coordinates matrix computed by the library:
38*ed264d09SValeria Barra //   (physical 3D coords relative to reference 2D coords)
39*ed264d09SValeria Barra //   dxx_j/dX_i (indicial notation) [3 * 2]
40*ed264d09SValeria Barra //
41*ed264d09SValeria Barra // Change of coordinates x (on the 2D manifold) relative to xx (phyisical 3D):
42*ed264d09SValeria Barra //   dx_i/dxx_j (indicial notation) [3 * 3]
43*ed264d09SValeria Barra //
44*ed264d09SValeria Barra // Change of coordinates x (on the 2D manifold) relative to X (reference 2D):
45*ed264d09SValeria Barra //   (by chain rule)
46*ed264d09SValeria Barra //   dx_i/dX_j [3 * 2] = dx_i/dxx_k [3 * 3] * dxx_k/dX_j [3 * 2]
47*ed264d09SValeria Barra //
48*ed264d09SValeria Barra // modJ is given by the magnitude of the cross product of the columns of dx_i/dX_j
49*ed264d09SValeria Barra //
50*ed264d09SValeria Barra // The quadrature data is stored in the array qdata.
51*ed264d09SValeria Barra //
52*ed264d09SValeria Barra // We require the determinant of the Jacobian to properly compute integrals of
53*ed264d09SValeria Barra //   the form: int( u v )
54*ed264d09SValeria Barra //
55*ed264d09SValeria Barra // qdata[0]: modJ * w
56*ed264d09SValeria Barra //
57*ed264d09SValeria Barra // We use the Moore–Penrose (left) pseudoinverse of dx_i/dX_j, to compute dX_i/dx_j (and its transpose),
58*ed264d09SValeria Barra //   needed to properly compute integrals of the form: int( gradv gradu )
59*ed264d09SValeria Barra //
60*ed264d09SValeria Barra // dX_i/dx_j [2 * 3] = (dx_i/dX_j)+ = (dxdX^T dxdX)^(-1) dxdX
61*ed264d09SValeria Barra //
62*ed264d09SValeria Barra // Stored: dX_i/dx_j * dX_j/dx_i (in Voigt convention)
63*ed264d09SValeria Barra //   in qdata[1:3] as
64*ed264d09SValeria Barra //   [dXdxdXdxT11 dXdxdXdxT12]
65*ed264d09SValeria Barra //   [dXdxdXdxT21 dXdxdXdxT22]
66*ed264d09SValeria Barra // *****************************************************************************
67*ed264d09SValeria Barra 
68*ed264d09SValeria Barra // -----------------------------------------------------------------------------
69*ed264d09SValeria Barra CEED_QFUNCTION(SetupDiffGeo)(void *ctx, CeedInt Q,
70*ed264d09SValeria Barra                              const CeedScalar *const *in,
71*ed264d09SValeria Barra                              CeedScalar *const *out) {
72*ed264d09SValeria Barra   const CeedScalar *X = in[0], *J = in[1], *w = in[2];
73*ed264d09SValeria Barra   CeedScalar *qdata = out[0];
74*ed264d09SValeria Barra 
75*ed264d09SValeria Barra   // Quadrature Point Loop
76*ed264d09SValeria Barra   CeedPragmaSIMD
77*ed264d09SValeria Barra   for (CeedInt i=0; i<Q; i++) {
78*ed264d09SValeria Barra     // Read global Cartesian coordinates
79*ed264d09SValeria Barra     const CeedScalar xx[3] = {X[i+0*Q],
80*ed264d09SValeria Barra                               X[i+1*Q],
81*ed264d09SValeria Barra                               X[i+2*Q]
82*ed264d09SValeria Barra                              };
83*ed264d09SValeria Barra 
84*ed264d09SValeria Barra     // Read dxxdX Jacobian entries, stored as
85*ed264d09SValeria Barra     // 0 3
86*ed264d09SValeria Barra     // 1 4
87*ed264d09SValeria Barra     // 2 5
88*ed264d09SValeria Barra     const CeedScalar dxxdX[3][2] = {{J[i+Q*0],
89*ed264d09SValeria Barra                                      J[i+Q*3]},
90*ed264d09SValeria Barra                                     {J[i+Q*1],
91*ed264d09SValeria Barra                                      J[i+Q*4]},
92*ed264d09SValeria Barra                                     {J[i+Q*2],
93*ed264d09SValeria Barra                                      J[i+Q*5]}
94*ed264d09SValeria Barra                                    };
95*ed264d09SValeria Barra 
96*ed264d09SValeria Barra     // Setup
97*ed264d09SValeria Barra     // x = xx (xx^T xx)^{-1/2}
98*ed264d09SValeria Barra     // dx/dxx = I (xx^T xx)^{-1/2} - xx xx^T (xx^T xx)^{-3/2}
99*ed264d09SValeria Barra     const CeedScalar modxxsq = xx[0]*xx[0]+xx[1]*xx[1]+xx[2]*xx[2];
100*ed264d09SValeria Barra     CeedScalar xxsq[3][3];
101*ed264d09SValeria Barra     for (int j=0; j<3; j++)
102*ed264d09SValeria Barra       for (int k=0; k<3; k++)
103*ed264d09SValeria Barra         xxsq[j][k] = xx[j]*xx[k] / (sqrt(modxxsq) * modxxsq);
104*ed264d09SValeria Barra 
105*ed264d09SValeria Barra     const CeedScalar dxdxx[3][3] = {{1./sqrt(modxxsq) - xxsq[0][0],
106*ed264d09SValeria Barra                                      -xxsq[0][1],
107*ed264d09SValeria Barra                                      -xxsq[0][2]},
108*ed264d09SValeria Barra                                     {-xxsq[1][0],
109*ed264d09SValeria Barra                                      1./sqrt(modxxsq) - xxsq[1][1],
110*ed264d09SValeria Barra                                      -xxsq[1][2]},
111*ed264d09SValeria Barra                                     {-xxsq[2][0],
112*ed264d09SValeria Barra                                      -xxsq[2][1],
113*ed264d09SValeria Barra                                      1./sqrt(modxxsq) - xxsq[2][2]}
114*ed264d09SValeria Barra                                    };
115*ed264d09SValeria Barra 
116*ed264d09SValeria Barra     CeedScalar dxdX[3][2];
117*ed264d09SValeria Barra     for (int j=0; j<3; j++)
118*ed264d09SValeria Barra       for (int k=0; k<2; k++) {
119*ed264d09SValeria Barra         dxdX[j][k] = 0;
120*ed264d09SValeria Barra         for (int l=0; l<3; l++)
121*ed264d09SValeria Barra           dxdX[j][k] += dxdxx[j][l]*dxxdX[l][k];
122*ed264d09SValeria Barra       }
123*ed264d09SValeria Barra 
124*ed264d09SValeria Barra     // J is given by the cross product of the columns of dxdX
125*ed264d09SValeria Barra     const CeedScalar J[3]= {dxdX[1][0]*dxdX[2][1] - dxdX[2][0]*dxdX[1][1],
126*ed264d09SValeria Barra                             dxdX[2][0]*dxdX[0][1] - dxdX[0][0]*dxdX[2][1],
127*ed264d09SValeria Barra                             dxdX[0][0]*dxdX[1][1] - dxdX[1][0]*dxdX[0][1]
128*ed264d09SValeria Barra                            };
129*ed264d09SValeria Barra 
130*ed264d09SValeria Barra     // Use the magnitude of J as our detJ (volume scaling factor)
131*ed264d09SValeria Barra     const CeedScalar modJ = sqrt(J[0]*J[0]+J[1]*J[1]+J[2]*J[2]);
132*ed264d09SValeria Barra 
133*ed264d09SValeria Barra     // Interp-to-Interp qdata
134*ed264d09SValeria Barra     qdata[i+Q*0] = modJ * w[i];
135*ed264d09SValeria Barra 
136*ed264d09SValeria Barra     // dxdX_j,k * dxdX_k,j, needed for the pseudoinverse
137*ed264d09SValeria Barra     CeedScalar dxdXTdxdX[2][2];
138*ed264d09SValeria Barra     for (int j=0; j<2; j++)
139*ed264d09SValeria Barra       for (int k=0; k<2; k++) {
140*ed264d09SValeria Barra         dxdXTdxdX[j][k] = 0;
141*ed264d09SValeria Barra         for (int l=0; l<3; l++)
142*ed264d09SValeria Barra           dxdXTdxdX[j][k] += dxdX[l][j]*dxdX[l][k];
143*ed264d09SValeria Barra       }
144*ed264d09SValeria Barra 
145*ed264d09SValeria Barra     const CeedScalar detdxdXTdxdX =  dxdXTdxdX[0][0] * dxdXTdxdX[1][1]
146*ed264d09SValeria Barra                                     -dxdXTdxdX[1][0] * dxdXTdxdX[0][1];
147*ed264d09SValeria Barra 
148*ed264d09SValeria Barra     // Compute inverse of dxdXTdxdX, needed for the pseudoinverse
149*ed264d09SValeria Barra     CeedScalar dxdXTdxdXinv[2][2];
150*ed264d09SValeria Barra     dxdXTdxdXinv[0][0] =  dxdXTdxdX[1][1] / detdxdXTdxdX;
151*ed264d09SValeria Barra     dxdXTdxdXinv[0][1] = -dxdXTdxdX[0][1] / detdxdXTdxdX;
152*ed264d09SValeria Barra     dxdXTdxdXinv[1][0] = -dxdXTdxdX[1][0] / detdxdXTdxdX;
153*ed264d09SValeria Barra     dxdXTdxdXinv[1][1] =  dxdXTdxdX[0][0] / detdxdXTdxdX;
154*ed264d09SValeria Barra 
155*ed264d09SValeria Barra     // Compute the pseudo inverse of dxdX
156*ed264d09SValeria Barra     CeedScalar pseudodXdx[2][3];
157*ed264d09SValeria Barra     for (int j=0; j<2; j++)
158*ed264d09SValeria Barra       for (int k=0; k<3; k++) {
159*ed264d09SValeria Barra         pseudodXdx[j][k] = 0;
160*ed264d09SValeria Barra         for (int l=0; l<2; l++)
161*ed264d09SValeria Barra           pseudodXdx[j][k] += dxdXTdxdXinv[j][l]*dxdX[k][l];
162*ed264d09SValeria Barra       }
163*ed264d09SValeria Barra 
164*ed264d09SValeria Barra     // Grad-to-Grad qdata is given by pseudodXdx * pseudodXdxT
165*ed264d09SValeria Barra     CeedScalar dXdxdXdxT[2][2];
166*ed264d09SValeria Barra     for (int j=0; j<2; j++)
167*ed264d09SValeria Barra       for (int k=0; k<2; k++) {
168*ed264d09SValeria Barra         dXdxdXdxT[j][k] = 0;
169*ed264d09SValeria Barra         for (int l=0; l<3; l++)
170*ed264d09SValeria Barra           dXdxdXdxT[j][k] += pseudodXdx[j][l]*pseudodXdx[k][l];
171*ed264d09SValeria Barra       }
172*ed264d09SValeria Barra 
173*ed264d09SValeria Barra     // Stored in Voigt convention
174*ed264d09SValeria Barra     qdata[i+Q*1] = dXdxdXdxT[0][0];
175*ed264d09SValeria Barra     qdata[i+Q*2] = dXdxdXdxT[1][1];
176*ed264d09SValeria Barra     qdata[i+Q*3] = dXdxdXdxT[0][1];
177*ed264d09SValeria Barra 
178*ed264d09SValeria Barra   } // End of Quadrature Point Loop
179*ed264d09SValeria Barra 
180*ed264d09SValeria Barra   // Return
181*ed264d09SValeria Barra   return 0;
182*ed264d09SValeria Barra }
183*ed264d09SValeria Barra 
184*ed264d09SValeria Barra // *****************************************************************************
185*ed264d09SValeria Barra // This QFunction sets up the rhs and true solution for the problem
186*ed264d09SValeria Barra // *****************************************************************************
187*ed264d09SValeria Barra 
188*ed264d09SValeria Barra // -----------------------------------------------------------------------------
189*ed264d09SValeria Barra CEED_QFUNCTION(SetupDiffRhs)(void *ctx, CeedInt Q,
190*ed264d09SValeria Barra                              const CeedScalar *const *in,
191*ed264d09SValeria Barra                              CeedScalar *const *out) {
192*ed264d09SValeria Barra   // Inputs
193*ed264d09SValeria Barra   const CeedScalar *X = in[0], *qdata = in[1];
194*ed264d09SValeria Barra   // Outputs
195*ed264d09SValeria Barra   CeedScalar *true_soln = out[0], *rhs = out[1];
196*ed264d09SValeria Barra 
197*ed264d09SValeria Barra   // Context
198*ed264d09SValeria Barra   const CeedScalar *context = (const CeedScalar*)ctx;
199*ed264d09SValeria Barra   const CeedScalar R        = context[0];
200*ed264d09SValeria Barra 
201*ed264d09SValeria Barra   // Quadrature Point Loop
202*ed264d09SValeria Barra   CeedPragmaSIMD
203*ed264d09SValeria Barra   for (CeedInt i=0; i<Q; i++) {
204*ed264d09SValeria Barra     // Read global Cartesian coordinates
205*ed264d09SValeria Barra     CeedScalar x = X[i+Q*0], y = X[i+Q*1], z = X[i+Q*2];
206*ed264d09SValeria Barra     // Normalize quadrature point coordinates to sphere
207*ed264d09SValeria Barra     CeedScalar rad = sqrt(x*x + y*y + z*z);
208*ed264d09SValeria Barra     x *= R / rad;
209*ed264d09SValeria Barra     y *= R / rad;
210*ed264d09SValeria Barra     z *= R / rad;
211*ed264d09SValeria Barra     // Compute latitude and longitude
212*ed264d09SValeria Barra     const CeedScalar theta  = asin(z / R); // latitude
213*ed264d09SValeria Barra     const CeedScalar lambda = atan2(y, x); // longitude
214*ed264d09SValeria Barra 
215*ed264d09SValeria Barra     true_soln[i+Q*0] = sin(lambda) * cos(theta);
216*ed264d09SValeria Barra 
217*ed264d09SValeria Barra     rhs[i+Q*0] = qdata[i+Q*0] * 2 * sin(lambda)*cos(theta) / (R*R);
218*ed264d09SValeria Barra 
219*ed264d09SValeria Barra   } // End of Quadrature Point Loop
220*ed264d09SValeria Barra 
221*ed264d09SValeria Barra   return 0;
222*ed264d09SValeria Barra }
223*ed264d09SValeria Barra 
224*ed264d09SValeria Barra // *****************************************************************************
225*ed264d09SValeria Barra // This QFunction applies the diffusion operator for a scalar field.
226*ed264d09SValeria Barra //
227*ed264d09SValeria Barra // Inputs:
228*ed264d09SValeria Barra //   ug     - Input vector gradient at quadrature points
229*ed264d09SValeria Barra //   qdata  - Geometric factors
230*ed264d09SValeria Barra //
231*ed264d09SValeria Barra // Output:
232*ed264d09SValeria Barra //   vg     - Output vector (test functions) gradient at quadrature points
233*ed264d09SValeria Barra //
234*ed264d09SValeria Barra // *****************************************************************************
235*ed264d09SValeria Barra 
236*ed264d09SValeria Barra // -----------------------------------------------------------------------------
237*ed264d09SValeria Barra CEED_QFUNCTION(Diff)(void *ctx, CeedInt Q,
238*ed264d09SValeria Barra                      const CeedScalar *const *in, CeedScalar *const *out) {
239*ed264d09SValeria Barra   // Inputs
240*ed264d09SValeria Barra   const CeedScalar *ug = in[0], *qdata = in[1];
241*ed264d09SValeria Barra   // Outputs
242*ed264d09SValeria Barra   CeedScalar *vg = out[0];
243*ed264d09SValeria Barra 
244*ed264d09SValeria Barra   // Quadrature Point Loop
245*ed264d09SValeria Barra   CeedPragmaSIMD
246*ed264d09SValeria Barra   for (CeedInt i=0; i<Q; i++) {
247*ed264d09SValeria Barra     // Read spatial derivatives of u
248*ed264d09SValeria Barra     const CeedScalar du[2]           =  {ug[i+Q*0],
249*ed264d09SValeria Barra                                          ug[i+Q*1]
250*ed264d09SValeria Barra                                         };
251*ed264d09SValeria Barra     // Read qdata
252*ed264d09SValeria Barra     const CeedScalar wJ              =   qdata[i+Q*0];
253*ed264d09SValeria Barra     // -- Grad-to-Grad qdata
254*ed264d09SValeria Barra     // ---- dXdx_j,k * dXdx_k,j
255*ed264d09SValeria Barra     const CeedScalar dXdxdXdxT[2][2] = {{qdata[i+Q*1],
256*ed264d09SValeria Barra                                          qdata[i+Q*3]},
257*ed264d09SValeria Barra                                         {qdata[i+Q*3],
258*ed264d09SValeria Barra                                          qdata[i+Q*2]}
259*ed264d09SValeria Barra                                        };
260*ed264d09SValeria Barra 
261*ed264d09SValeria Barra     for (int j=0; j<2; j++) // j = direction of vg
262*ed264d09SValeria Barra       vg[i+j*Q] = wJ * (du[0] * dXdxdXdxT[0][j] +
263*ed264d09SValeria Barra                         du[1] * dXdxdXdxT[1][j]);
264*ed264d09SValeria Barra 
265*ed264d09SValeria Barra   } // End of Quadrature Point Loop
266*ed264d09SValeria Barra 
267*ed264d09SValeria Barra   return 0;
268*ed264d09SValeria Barra }
269*ed264d09SValeria Barra // -----------------------------------------------------------------------------
270