xref: /petsc/src/dm/dt/interface/dt.c (revision 07218a29b4d399ea5732e51342b017bba1a66494)
1 /* Discretization tools */
2 
3 #include <petscdt.h> /*I "petscdt.h" I*/
4 #include <petscblaslapack.h>
5 #include <petsc/private/petscimpl.h>
6 #include <petsc/private/dtimpl.h>
7 #include <petsc/private/petscfeimpl.h> /* For CoordinatesRefToReal() */
8 #include <petscviewer.h>
9 #include <petscdmplex.h>
10 #include <petscdmshell.h>
11 
12 #if defined(PETSC_HAVE_MPFR)
13   #include <mpfr.h>
14 #endif
15 
16 const char *const        PetscDTNodeTypes_shifted[] = {"default", "gaussjacobi", "equispaced", "tanhsinh", "PETSCDTNODES_", NULL};
17 const char *const *const PetscDTNodeTypes           = PetscDTNodeTypes_shifted + 1;
18 
19 const char *const        PetscDTSimplexQuadratureTypes_shifted[] = {"default", "conic", "minsym", "PETSCDTSIMPLEXQUAD_", NULL};
20 const char *const *const PetscDTSimplexQuadratureTypes           = PetscDTSimplexQuadratureTypes_shifted + 1;
21 
22 static PetscBool GolubWelschCite       = PETSC_FALSE;
23 const char       GolubWelschCitation[] = "@article{GolubWelsch1969,\n"
24                                          "  author  = {Golub and Welsch},\n"
25                                          "  title   = {Calculation of Quadrature Rules},\n"
26                                          "  journal = {Math. Comp.},\n"
27                                          "  volume  = {23},\n"
28                                          "  number  = {106},\n"
29                                          "  pages   = {221--230},\n"
30                                          "  year    = {1969}\n}\n";
31 
32 /* Numerical tests in src/dm/dt/tests/ex1.c show that when computing the nodes and weights of Gauss-Jacobi
33    quadrature rules:
34 
35    - in double precision, Newton's method and Golub & Welsch both work for moderate degrees (< 100),
36    - in single precision, Newton's method starts producing incorrect roots around n = 15, but
37      the weights from Golub & Welsch become a problem before then: they produces errors
38      in computing the Jacobi-polynomial Gram matrix around n = 6.
39 
40    So we default to Newton's method (required fewer dependencies) */
41 PetscBool PetscDTGaussQuadratureNewton_Internal = PETSC_TRUE;
42 
43 PetscClassId PETSCQUADRATURE_CLASSID = 0;
44 
45 /*@
46   PetscQuadratureCreate - Create a `PetscQuadrature` object
47 
48   Collective
49 
50   Input Parameter:
51 . comm - The communicator for the `PetscQuadrature` object
52 
53   Output Parameter:
54 . q  - The `PetscQuadrature` object
55 
56   Level: beginner
57 
58 .seealso: `PetscQuadrature`, `Petscquadraturedestroy()`, `PetscQuadratureGetData()`
59 @*/
60 PetscErrorCode PetscQuadratureCreate(MPI_Comm comm, PetscQuadrature *q)
61 {
62   PetscFunctionBegin;
63   PetscValidPointer(q, 2);
64   PetscCall(DMInitializePackage());
65   PetscCall(PetscHeaderCreate(*q, PETSCQUADRATURE_CLASSID, "PetscQuadrature", "Quadrature", "DT", comm, PetscQuadratureDestroy, PetscQuadratureView));
66   (*q)->dim       = -1;
67   (*q)->Nc        = 1;
68   (*q)->order     = -1;
69   (*q)->numPoints = 0;
70   (*q)->points    = NULL;
71   (*q)->weights   = NULL;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 /*@
76   PetscQuadratureDuplicate - Create a deep copy of the `PetscQuadrature` object
77 
78   Collective
79 
80   Input Parameter:
81 . q  - The `PetscQuadrature` object
82 
83   Output Parameter:
84 . r  - The new `PetscQuadrature` object
85 
86   Level: beginner
87 
88 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureDestroy()`, `PetscQuadratureGetData()`
89 @*/
90 PetscErrorCode PetscQuadratureDuplicate(PetscQuadrature q, PetscQuadrature *r)
91 {
92   PetscInt         order, dim, Nc, Nq;
93   const PetscReal *points, *weights;
94   PetscReal       *p, *w;
95 
96   PetscFunctionBegin;
97   PetscValidPointer(q, 1);
98   PetscCall(PetscQuadratureCreate(PetscObjectComm((PetscObject)q), r));
99   PetscCall(PetscQuadratureGetOrder(q, &order));
100   PetscCall(PetscQuadratureSetOrder(*r, order));
101   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &Nq, &points, &weights));
102   PetscCall(PetscMalloc1(Nq * dim, &p));
103   PetscCall(PetscMalloc1(Nq * Nc, &w));
104   PetscCall(PetscArraycpy(p, points, Nq * dim));
105   PetscCall(PetscArraycpy(w, weights, Nc * Nq));
106   PetscCall(PetscQuadratureSetData(*r, dim, Nc, Nq, p, w));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 /*@
111   PetscQuadratureDestroy - Destroys a `PetscQuadrature` object
112 
113   Collective
114 
115   Input Parameter:
116 . q  - The `PetscQuadrature` object
117 
118   Level: beginner
119 
120 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
121 @*/
122 PetscErrorCode PetscQuadratureDestroy(PetscQuadrature *q)
123 {
124   PetscFunctionBegin;
125   if (!*q) PetscFunctionReturn(PETSC_SUCCESS);
126   PetscValidHeaderSpecific((*q), PETSCQUADRATURE_CLASSID, 1);
127   if (--((PetscObject)(*q))->refct > 0) {
128     *q = NULL;
129     PetscFunctionReturn(PETSC_SUCCESS);
130   }
131   PetscCall(PetscFree((*q)->points));
132   PetscCall(PetscFree((*q)->weights));
133   PetscCall(PetscHeaderDestroy(q));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*@
138   PetscQuadratureGetOrder - Return the order of the method in the `PetscQuadrature`
139 
140   Not Collective
141 
142   Input Parameter:
143 . q - The `PetscQuadrature` object
144 
145   Output Parameter:
146 . order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated
147 
148   Level: intermediate
149 
150 .seealso: `PetscQuadrature`, `PetscQuadratureSetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
151 @*/
152 PetscErrorCode PetscQuadratureGetOrder(PetscQuadrature q, PetscInt *order)
153 {
154   PetscFunctionBegin;
155   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
156   PetscValidIntPointer(order, 2);
157   *order = q->order;
158   PetscFunctionReturn(PETSC_SUCCESS);
159 }
160 
161 /*@
162   PetscQuadratureSetOrder - Set the order of the method in the `PetscQuadrature`
163 
164   Not Collective
165 
166   Input Parameters:
167 + q - The `PetscQuadrature` object
168 - order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated
169 
170   Level: intermediate
171 
172 .seealso: `PetscQuadrature`, `PetscQuadratureGetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
173 @*/
174 PetscErrorCode PetscQuadratureSetOrder(PetscQuadrature q, PetscInt order)
175 {
176   PetscFunctionBegin;
177   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
178   q->order = order;
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 /*@
183   PetscQuadratureGetNumComponents - Return the number of components for functions to be integrated
184 
185   Not Collective
186 
187   Input Parameter:
188 . q - The `PetscQuadrature` object
189 
190   Output Parameter:
191 . Nc - The number of components
192 
193   Level: intermediate
194 
195   Note:
196   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.
197 
198 .seealso: `PetscQuadrature`, `PetscQuadratureSetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
199 @*/
200 PetscErrorCode PetscQuadratureGetNumComponents(PetscQuadrature q, PetscInt *Nc)
201 {
202   PetscFunctionBegin;
203   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
204   PetscValidIntPointer(Nc, 2);
205   *Nc = q->Nc;
206   PetscFunctionReturn(PETSC_SUCCESS);
207 }
208 
209 /*@
210   PetscQuadratureSetNumComponents - Return the number of components for functions to be integrated
211 
212   Not Collective
213 
214   Input Parameters:
215 + q  - The PetscQuadrature object
216 - Nc - The number of components
217 
218   Level: intermediate
219 
220   Note:
221   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.
222 
223 .seealso: `PetscQuadrature`, `PetscQuadratureGetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
224 @*/
225 PetscErrorCode PetscQuadratureSetNumComponents(PetscQuadrature q, PetscInt Nc)
226 {
227   PetscFunctionBegin;
228   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
229   q->Nc = Nc;
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 /*@C
234   PetscQuadratureGetData - Returns the data defining the `PetscQuadrature`
235 
236   Not Collective
237 
238   Input Parameter:
239 . q  - The `PetscQuadrature` object
240 
241   Output Parameters:
242 + dim - The spatial dimension
243 . Nc - The number of components
244 . npoints - The number of quadrature points
245 . points - The coordinates of each quadrature point
246 - weights - The weight of each quadrature point
247 
248   Level: intermediate
249 
250   Fortran Note:
251   From Fortran you must call `PetscQuadratureRestoreData()` when you are done with the data
252 
253 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureSetData()`
254 @*/
255 PetscErrorCode PetscQuadratureGetData(PetscQuadrature q, PetscInt *dim, PetscInt *Nc, PetscInt *npoints, const PetscReal *points[], const PetscReal *weights[])
256 {
257   PetscFunctionBegin;
258   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
259   if (dim) {
260     PetscValidIntPointer(dim, 2);
261     *dim = q->dim;
262   }
263   if (Nc) {
264     PetscValidIntPointer(Nc, 3);
265     *Nc = q->Nc;
266   }
267   if (npoints) {
268     PetscValidIntPointer(npoints, 4);
269     *npoints = q->numPoints;
270   }
271   if (points) {
272     PetscValidPointer(points, 5);
273     *points = q->points;
274   }
275   if (weights) {
276     PetscValidPointer(weights, 6);
277     *weights = q->weights;
278   }
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 /*@
283   PetscQuadratureEqual - determine whether two quadratures are equivalent
284 
285   Input Parameters:
286 + A - A `PetscQuadrature` object
287 - B - Another `PetscQuadrature` object
288 
289   Output Parameters:
290 . equal - `PETSC_TRUE` if the quadratures are the same
291 
292   Level: intermediate
293 
294 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`
295 @*/
296 PetscErrorCode PetscQuadratureEqual(PetscQuadrature A, PetscQuadrature B, PetscBool *equal)
297 {
298   PetscFunctionBegin;
299   PetscValidHeaderSpecific(A, PETSCQUADRATURE_CLASSID, 1);
300   PetscValidHeaderSpecific(B, PETSCQUADRATURE_CLASSID, 2);
301   PetscValidBoolPointer(equal, 3);
302   *equal = PETSC_FALSE;
303   if (A->dim != B->dim || A->Nc != B->Nc || A->order != B->order || A->numPoints != B->numPoints) PetscFunctionReturn(PETSC_SUCCESS);
304   for (PetscInt i = 0; i < A->numPoints * A->dim; i++) {
305     if (PetscAbsReal(A->points[i] - B->points[i]) > PETSC_SMALL) PetscFunctionReturn(PETSC_SUCCESS);
306   }
307   if (!A->weights && !B->weights) {
308     *equal = PETSC_TRUE;
309     PetscFunctionReturn(PETSC_SUCCESS);
310   }
311   if (A->weights && B->weights) {
312     for (PetscInt i = 0; i < A->numPoints; i++) {
313       if (PetscAbsReal(A->weights[i] - B->weights[i]) > PETSC_SMALL) PetscFunctionReturn(PETSC_SUCCESS);
314     }
315     *equal = PETSC_TRUE;
316   }
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 static PetscErrorCode PetscDTJacobianInverse_Internal(PetscInt m, PetscInt n, const PetscReal J[], PetscReal Jinv[])
321 {
322   PetscScalar *Js, *Jinvs;
323   PetscInt     i, j, k;
324   PetscBLASInt bm, bn, info;
325 
326   PetscFunctionBegin;
327   if (!m || !n) PetscFunctionReturn(PETSC_SUCCESS);
328   PetscCall(PetscBLASIntCast(m, &bm));
329   PetscCall(PetscBLASIntCast(n, &bn));
330 #if defined(PETSC_USE_COMPLEX)
331   PetscCall(PetscMalloc2(m * n, &Js, m * n, &Jinvs));
332   for (i = 0; i < m * n; i++) Js[i] = J[i];
333 #else
334   Js    = (PetscReal *)J;
335   Jinvs = Jinv;
336 #endif
337   if (m == n) {
338     PetscBLASInt *pivots;
339     PetscScalar  *W;
340 
341     PetscCall(PetscMalloc2(m, &pivots, m, &W));
342 
343     PetscCall(PetscArraycpy(Jinvs, Js, m * m));
344     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, Jinvs, &bm, pivots, &info));
345     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
346     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, Jinvs, &bm, pivots, W, &bm, &info));
347     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
348     PetscCall(PetscFree2(pivots, W));
349   } else if (m < n) {
350     PetscScalar  *JJT;
351     PetscBLASInt *pivots;
352     PetscScalar  *W;
353 
354     PetscCall(PetscMalloc1(m * m, &JJT));
355     PetscCall(PetscMalloc2(m, &pivots, m, &W));
356     for (i = 0; i < m; i++) {
357       for (j = 0; j < m; j++) {
358         PetscScalar val = 0.;
359 
360         for (k = 0; k < n; k++) val += Js[i * n + k] * Js[j * n + k];
361         JJT[i * m + j] = val;
362       }
363     }
364 
365     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, JJT, &bm, pivots, &info));
366     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
367     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, JJT, &bm, pivots, W, &bm, &info));
368     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
369     for (i = 0; i < n; i++) {
370       for (j = 0; j < m; j++) {
371         PetscScalar val = 0.;
372 
373         for (k = 0; k < m; k++) val += Js[k * n + i] * JJT[k * m + j];
374         Jinvs[i * m + j] = val;
375       }
376     }
377     PetscCall(PetscFree2(pivots, W));
378     PetscCall(PetscFree(JJT));
379   } else {
380     PetscScalar  *JTJ;
381     PetscBLASInt *pivots;
382     PetscScalar  *W;
383 
384     PetscCall(PetscMalloc1(n * n, &JTJ));
385     PetscCall(PetscMalloc2(n, &pivots, n, &W));
386     for (i = 0; i < n; i++) {
387       for (j = 0; j < n; j++) {
388         PetscScalar val = 0.;
389 
390         for (k = 0; k < m; k++) val += Js[k * n + i] * Js[k * n + j];
391         JTJ[i * n + j] = val;
392       }
393     }
394 
395     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, JTJ, &bn, pivots, &info));
396     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetrf %" PetscInt_FMT, (PetscInt)info);
397     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, JTJ, &bn, pivots, W, &bn, &info));
398     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error returned from LAPACKgetri %" PetscInt_FMT, (PetscInt)info);
399     for (i = 0; i < n; i++) {
400       for (j = 0; j < m; j++) {
401         PetscScalar val = 0.;
402 
403         for (k = 0; k < n; k++) val += JTJ[i * n + k] * Js[j * n + k];
404         Jinvs[i * m + j] = val;
405       }
406     }
407     PetscCall(PetscFree2(pivots, W));
408     PetscCall(PetscFree(JTJ));
409   }
410 #if defined(PETSC_USE_COMPLEX)
411   for (i = 0; i < m * n; i++) Jinv[i] = PetscRealPart(Jinvs[i]);
412   PetscCall(PetscFree2(Js, Jinvs));
413 #endif
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 /*@
418    PetscQuadraturePushForward - Push forward a quadrature functional under an affine transformation.
419 
420    Collective
421 
422    Input Parameters:
423 +  q - the quadrature functional
424 .  imageDim - the dimension of the image of the transformation
425 .  origin - a point in the original space
426 .  originImage - the image of the origin under the transformation
427 .  J - the Jacobian of the image: an [imageDim x dim] matrix in row major order
428 -  formDegree - transform the quadrature weights as k-forms of this form degree (if the number of components is a multiple of (dim choose formDegree), it is assumed that they represent multiple k-forms) [see `PetscDTAltVPullback()` for interpretation of formDegree]
429 
430    Output Parameters:
431 .  Jinvstarq - a quadrature rule where each point is the image of a point in the original quadrature rule, and where the k-form weights have been pulled-back by the pseudoinverse of J to the k-form weights in the image space.
432 
433    Level: intermediate
434 
435    Note:
436    The new quadrature rule will have a different number of components if spaces have different dimensions.  For example, pushing a 2-form forward from a two dimensional space to a three dimensional space changes the number of components from 1 to 3.
437 
438 .seealso: `PetscQuadrature`, `PetscDTAltVPullback()`, `PetscDTAltVPullbackMatrix()`
439 @*/
440 PetscErrorCode PetscQuadraturePushForward(PetscQuadrature q, PetscInt imageDim, const PetscReal origin[], const PetscReal originImage[], const PetscReal J[], PetscInt formDegree, PetscQuadrature *Jinvstarq)
441 {
442   PetscInt         dim, Nc, imageNc, formSize, Ncopies, imageFormSize, Npoints, pt, i, j, c;
443   const PetscReal *points;
444   const PetscReal *weights;
445   PetscReal       *imagePoints, *imageWeights;
446   PetscReal       *Jinv;
447   PetscReal       *Jinvstar;
448 
449   PetscFunctionBegin;
450   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
451   PetscCheck(imageDim >= PetscAbsInt(formDegree), PetscObjectComm((PetscObject)q), PETSC_ERR_ARG_INCOMP, "Cannot represent a %" PetscInt_FMT "-form in %" PetscInt_FMT " dimensions", PetscAbsInt(formDegree), imageDim);
452   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &Npoints, &points, &weights));
453   PetscCall(PetscDTBinomialInt(dim, PetscAbsInt(formDegree), &formSize));
454   PetscCheck(Nc % formSize == 0, PetscObjectComm((PetscObject)q), PETSC_ERR_ARG_INCOMP, "Number of components %" PetscInt_FMT " is not a multiple of formSize %" PetscInt_FMT, Nc, formSize);
455   Ncopies = Nc / formSize;
456   PetscCall(PetscDTBinomialInt(imageDim, PetscAbsInt(formDegree), &imageFormSize));
457   imageNc = Ncopies * imageFormSize;
458   PetscCall(PetscMalloc1(Npoints * imageDim, &imagePoints));
459   PetscCall(PetscMalloc1(Npoints * imageNc, &imageWeights));
460   PetscCall(PetscMalloc2(imageDim * dim, &Jinv, formSize * imageFormSize, &Jinvstar));
461   PetscCall(PetscDTJacobianInverse_Internal(imageDim, dim, J, Jinv));
462   PetscCall(PetscDTAltVPullbackMatrix(imageDim, dim, Jinv, formDegree, Jinvstar));
463   for (pt = 0; pt < Npoints; pt++) {
464     const PetscReal *point      = &points[pt * dim];
465     PetscReal       *imagePoint = &imagePoints[pt * imageDim];
466 
467     for (i = 0; i < imageDim; i++) {
468       PetscReal val = originImage[i];
469 
470       for (j = 0; j < dim; j++) val += J[i * dim + j] * (point[j] - origin[j]);
471       imagePoint[i] = val;
472     }
473     for (c = 0; c < Ncopies; c++) {
474       const PetscReal *form      = &weights[pt * Nc + c * formSize];
475       PetscReal       *imageForm = &imageWeights[pt * imageNc + c * imageFormSize];
476 
477       for (i = 0; i < imageFormSize; i++) {
478         PetscReal val = 0.;
479 
480         for (j = 0; j < formSize; j++) val += Jinvstar[i * formSize + j] * form[j];
481         imageForm[i] = val;
482       }
483     }
484   }
485   PetscCall(PetscQuadratureCreate(PetscObjectComm((PetscObject)q), Jinvstarq));
486   PetscCall(PetscQuadratureSetData(*Jinvstarq, imageDim, imageNc, Npoints, imagePoints, imageWeights));
487   PetscCall(PetscFree2(Jinv, Jinvstar));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 /*@C
492   PetscQuadratureSetData - Sets the data defining the quadrature
493 
494   Not Collective
495 
496   Input Parameters:
497 + q  - The `PetscQuadrature` object
498 . dim - The spatial dimension
499 . Nc - The number of components
500 . npoints - The number of quadrature points
501 . points - The coordinates of each quadrature point
502 - weights - The weight of each quadrature point
503 
504   Level: intermediate
505 
506   Note:
507   This routine owns the references to points and weights, so they must be allocated using `PetscMalloc()` and the user should not free them.
508 
509 .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
510 @*/
511 PetscErrorCode PetscQuadratureSetData(PetscQuadrature q, PetscInt dim, PetscInt Nc, PetscInt npoints, const PetscReal points[], const PetscReal weights[])
512 {
513   PetscFunctionBegin;
514   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
515   if (dim >= 0) q->dim = dim;
516   if (Nc >= 0) q->Nc = Nc;
517   if (npoints >= 0) q->numPoints = npoints;
518   if (points) {
519     PetscValidRealPointer(points, 5);
520     q->points = points;
521   }
522   if (weights) {
523     PetscValidRealPointer(weights, 6);
524     q->weights = weights;
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode PetscQuadratureView_Ascii(PetscQuadrature quad, PetscViewer v)
530 {
531   PetscInt          q, d, c;
532   PetscViewerFormat format;
533 
534   PetscFunctionBegin;
535   if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ") with %" PetscInt_FMT " components\n", quad->order, quad->numPoints, quad->dim, quad->Nc));
536   else PetscCall(PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ")\n", quad->order, quad->numPoints, quad->dim));
537   PetscCall(PetscViewerGetFormat(v, &format));
538   if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
539   for (q = 0; q < quad->numPoints; ++q) {
540     PetscCall(PetscViewerASCIIPrintf(v, "p%" PetscInt_FMT " (", q));
541     PetscCall(PetscViewerASCIIUseTabs(v, PETSC_FALSE));
542     for (d = 0; d < quad->dim; ++d) {
543       if (d) PetscCall(PetscViewerASCIIPrintf(v, ", "));
544       PetscCall(PetscViewerASCIIPrintf(v, "%+g", (double)quad->points[q * quad->dim + d]));
545     }
546     PetscCall(PetscViewerASCIIPrintf(v, ") "));
547     if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, "w%" PetscInt_FMT " (", q));
548     for (c = 0; c < quad->Nc; ++c) {
549       if (c) PetscCall(PetscViewerASCIIPrintf(v, ", "));
550       PetscCall(PetscViewerASCIIPrintf(v, "%+g", (double)quad->weights[q * quad->Nc + c]));
551     }
552     if (quad->Nc > 1) PetscCall(PetscViewerASCIIPrintf(v, ")"));
553     PetscCall(PetscViewerASCIIPrintf(v, "\n"));
554     PetscCall(PetscViewerASCIIUseTabs(v, PETSC_TRUE));
555   }
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 /*@C
560   PetscQuadratureView - View a `PetscQuadrature` object
561 
562   Collective
563 
564   Input Parameters:
565 + quad  - The `PetscQuadrature` object
566 - viewer - The `PetscViewer` object
567 
568   Level: beginner
569 
570 .seealso: `PetscQuadrature`, `PetscViewer`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
571 @*/
572 PetscErrorCode PetscQuadratureView(PetscQuadrature quad, PetscViewer viewer)
573 {
574   PetscBool iascii;
575 
576   PetscFunctionBegin;
577   PetscValidHeader(quad, 1);
578   if (viewer) PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
579   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)quad), &viewer));
580   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
581   PetscCall(PetscViewerASCIIPushTab(viewer));
582   if (iascii) PetscCall(PetscQuadratureView_Ascii(quad, viewer));
583   PetscCall(PetscViewerASCIIPopTab(viewer));
584   PetscFunctionReturn(PETSC_SUCCESS);
585 }
586 
587 /*@C
588   PetscQuadratureExpandComposite - Return a quadrature over the composite element, which has the original quadrature in each subelement
589 
590   Not Collective; No Fortran Support
591 
592   Input Parameters:
593 + q - The original `PetscQuadrature`
594 . numSubelements - The number of subelements the original element is divided into
595 . v0 - An array of the initial points for each subelement
596 - jac - An array of the Jacobian mappings from the reference to each subelement
597 
598   Output Parameters:
599 . dim - The dimension
600 
601   Level: intermediate
602 
603   Note:
604   Together v0 and jac define an affine mapping from the original reference element to each subelement
605 
606 .seealso: `PetscQuadrature`, `PetscFECreate()`, `PetscSpaceGetDimension()`, `PetscDualSpaceGetDimension()`
607 @*/
608 PetscErrorCode PetscQuadratureExpandComposite(PetscQuadrature q, PetscInt numSubelements, const PetscReal v0[], const PetscReal jac[], PetscQuadrature *qref)
609 {
610   const PetscReal *points, *weights;
611   PetscReal       *pointsRef, *weightsRef;
612   PetscInt         dim, Nc, order, npoints, npointsRef, c, p, cp, d, e;
613 
614   PetscFunctionBegin;
615   PetscValidHeaderSpecific(q, PETSCQUADRATURE_CLASSID, 1);
616   PetscValidRealPointer(v0, 3);
617   PetscValidRealPointer(jac, 4);
618   PetscValidPointer(qref, 5);
619   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, qref));
620   PetscCall(PetscQuadratureGetOrder(q, &order));
621   PetscCall(PetscQuadratureGetData(q, &dim, &Nc, &npoints, &points, &weights));
622   npointsRef = npoints * numSubelements;
623   PetscCall(PetscMalloc1(npointsRef * dim, &pointsRef));
624   PetscCall(PetscMalloc1(npointsRef * Nc, &weightsRef));
625   for (c = 0; c < numSubelements; ++c) {
626     for (p = 0; p < npoints; ++p) {
627       for (d = 0; d < dim; ++d) {
628         pointsRef[(c * npoints + p) * dim + d] = v0[c * dim + d];
629         for (e = 0; e < dim; ++e) pointsRef[(c * npoints + p) * dim + d] += jac[(c * dim + d) * dim + e] * (points[p * dim + e] + 1.0);
630       }
631       /* Could also use detJ here */
632       for (cp = 0; cp < Nc; ++cp) weightsRef[(c * npoints + p) * Nc + cp] = weights[p * Nc + cp] / numSubelements;
633     }
634   }
635   PetscCall(PetscQuadratureSetOrder(*qref, order));
636   PetscCall(PetscQuadratureSetData(*qref, dim, Nc, npointsRef, pointsRef, weightsRef));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /* Compute the coefficients for the Jacobi polynomial recurrence,
641  *
642  * J^{a,b}_n(x) = (cnm1 + cnm1x * x) * J^{a,b}_{n-1}(x) - cnm2 * J^{a,b}_{n-2}(x).
643  */
644 #define PetscDTJacobiRecurrence_Internal(n, a, b, cnm1, cnm1x, cnm2) \
645   do { \
646     PetscReal _a = (a); \
647     PetscReal _b = (b); \
648     PetscReal _n = (n); \
649     if (n == 1) { \
650       (cnm1)  = (_a - _b) * 0.5; \
651       (cnm1x) = (_a + _b + 2.) * 0.5; \
652       (cnm2)  = 0.; \
653     } else { \
654       PetscReal _2n  = _n + _n; \
655       PetscReal _d   = (_2n * (_n + _a + _b) * (_2n + _a + _b - 2)); \
656       PetscReal _n1  = (_2n + _a + _b - 1.) * (_a * _a - _b * _b); \
657       PetscReal _n1x = (_2n + _a + _b - 1.) * (_2n + _a + _b) * (_2n + _a + _b - 2); \
658       PetscReal _n2  = 2. * ((_n + _a - 1.) * (_n + _b - 1.) * (_2n + _a + _b)); \
659       (cnm1)         = _n1 / _d; \
660       (cnm1x)        = _n1x / _d; \
661       (cnm2)         = _n2 / _d; \
662     } \
663   } while (0)
664 
665 /*@
666   PetscDTJacobiNorm - Compute the weighted L2 norm of a Jacobi polynomial.
667 
668   $\| P^{\alpha,\beta}_n \|_{\alpha,\beta}^2 = \int_{-1}^1 (1 + x)^{\alpha} (1 - x)^{\beta} P^{\alpha,\beta}_n (x)^2 dx.$
669 
670   Input Parameters:
671 - alpha - the left exponent > -1
672 . beta - the right exponent > -1
673 + n - the polynomial degree
674 
675   Output Parameter:
676 . norm - the weighted L2 norm
677 
678   Level: beginner
679 
680 .seealso: `PetscQuadrature`, `PetscDTJacobiEval()`
681 @*/
682 PetscErrorCode PetscDTJacobiNorm(PetscReal alpha, PetscReal beta, PetscInt n, PetscReal *norm)
683 {
684   PetscReal twoab1;
685   PetscReal gr;
686 
687   PetscFunctionBegin;
688   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Exponent alpha %g <= -1. invalid", (double)alpha);
689   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Exponent beta %g <= -1. invalid", (double)beta);
690   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "n %" PetscInt_FMT " < 0 invalid", n);
691   twoab1 = PetscPowReal(2., alpha + beta + 1.);
692 #if defined(PETSC_HAVE_LGAMMA)
693   if (!n) {
694     gr = PetscExpReal(PetscLGamma(alpha + 1.) + PetscLGamma(beta + 1.) - PetscLGamma(alpha + beta + 2.));
695   } else {
696     gr = PetscExpReal(PetscLGamma(n + alpha + 1.) + PetscLGamma(n + beta + 1.) - (PetscLGamma(n + 1.) + PetscLGamma(n + alpha + beta + 1.))) / (n + n + alpha + beta + 1.);
697   }
698 #else
699   {
700     PetscInt alphai = (PetscInt)alpha;
701     PetscInt betai  = (PetscInt)beta;
702     PetscInt i;
703 
704     gr = n ? (1. / (n + n + alpha + beta + 1.)) : 1.;
705     if ((PetscReal)alphai == alpha) {
706       if (!n) {
707         for (i = 0; i < alphai; i++) gr *= (i + 1.) / (beta + i + 1.);
708         gr /= (alpha + beta + 1.);
709       } else {
710         for (i = 0; i < alphai; i++) gr *= (n + i + 1.) / (n + beta + i + 1.);
711       }
712     } else if ((PetscReal)betai == beta) {
713       if (!n) {
714         for (i = 0; i < betai; i++) gr *= (i + 1.) / (alpha + i + 2.);
715         gr /= (alpha + beta + 1.);
716       } else {
717         for (i = 0; i < betai; i++) gr *= (n + i + 1.) / (n + alpha + i + 1.);
718       }
719     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
720   }
721 #endif
722   *norm = PetscSqrtReal(twoab1 * gr);
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode PetscDTJacobiEval_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscInt k, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *p)
727 {
728   PetscReal ak, bk;
729   PetscReal abk1;
730   PetscInt  i, l, maxdegree;
731 
732   PetscFunctionBegin;
733   maxdegree = degrees[ndegree - 1] - k;
734   ak        = a + k;
735   bk        = b + k;
736   abk1      = a + b + k + 1.;
737   if (maxdegree < 0) {
738     for (i = 0; i < npoints; i++)
739       for (l = 0; l < ndegree; l++) p[i * ndegree + l] = 0.;
740     PetscFunctionReturn(PETSC_SUCCESS);
741   }
742   for (i = 0; i < npoints; i++) {
743     PetscReal pm1, pm2, x;
744     PetscReal cnm1, cnm1x, cnm2;
745     PetscInt  j, m;
746 
747     x   = points[i];
748     pm2 = 1.;
749     PetscDTJacobiRecurrence_Internal(1, ak, bk, cnm1, cnm1x, cnm2);
750     pm1 = (cnm1 + cnm1x * x);
751     l   = 0;
752     while (l < ndegree && degrees[l] - k < 0) p[l++] = 0.;
753     while (l < ndegree && degrees[l] - k == 0) {
754       p[l] = pm2;
755       for (m = 0; m < k; m++) p[l] *= (abk1 + m) * 0.5;
756       l++;
757     }
758     while (l < ndegree && degrees[l] - k == 1) {
759       p[l] = pm1;
760       for (m = 0; m < k; m++) p[l] *= (abk1 + 1 + m) * 0.5;
761       l++;
762     }
763     for (j = 2; j <= maxdegree; j++) {
764       PetscReal pp;
765 
766       PetscDTJacobiRecurrence_Internal(j, ak, bk, cnm1, cnm1x, cnm2);
767       pp  = (cnm1 + cnm1x * x) * pm1 - cnm2 * pm2;
768       pm2 = pm1;
769       pm1 = pp;
770       while (l < ndegree && degrees[l] - k == j) {
771         p[l] = pp;
772         for (m = 0; m < k; m++) p[l] *= (abk1 + j + m) * 0.5;
773         l++;
774       }
775     }
776     p += ndegree;
777   }
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 /*@
782   PetscDTJacobiEvalJet - Evaluate the jet (function and derivatives) of the Jacobi polynomials basis up to a given degree.
783   The Jacobi polynomials with indices $\alpha$ and $\beta$ are orthogonal with respect to the weighted inner product
784   $\langle f, g \rangle = \int_{-1}^1 (1+x)^{\alpha} (1-x)^{\beta} f(x) g(x) dx$.
785 
786   Input Parameters:
787 + alpha - the left exponent of the weight
788 . beta - the right exponetn of the weight
789 . npoints - the number of points to evaluate the polynomials at
790 . points - [npoints] array of point coordinates
791 . degree - the maximm degree polynomial space to evaluate, (degree + 1) will be evaluated total.
792 - k - the maximum derivative to evaluate in the jet, (k + 1) will be evaluated total.
793 
794   Output Parameters:
795 - p - an array containing the evaluations of the Jacobi polynomials's jets on the points.  the size is (degree + 1) x
796   (k + 1) x npoints, which also describes the order of the dimensions of this three-dimensional array: the first
797   (slowest varying) dimension is polynomial degree; the second dimension is derivative order; the third (fastest
798   varying) dimension is the index of the evaluation point.
799 
800   Level: advanced
801 
802 .seealso: `PetscDTJacobiEval()`, `PetscDTPKDEvalJet()`
803 @*/
804 PetscErrorCode PetscDTJacobiEvalJet(PetscReal alpha, PetscReal beta, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
805 {
806   PetscInt   i, j, l;
807   PetscInt  *degrees;
808   PetscReal *psingle;
809 
810   PetscFunctionBegin;
811   if (degree == 0) {
812     PetscInt zero = 0;
813 
814     for (i = 0; i <= k; i++) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, 1, &zero, &p[i * npoints]));
815     PetscFunctionReturn(PETSC_SUCCESS);
816   }
817   PetscCall(PetscMalloc1(degree + 1, &degrees));
818   PetscCall(PetscMalloc1((degree + 1) * npoints, &psingle));
819   for (i = 0; i <= degree; i++) degrees[i] = i;
820   for (i = 0; i <= k; i++) {
821     PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, degree + 1, degrees, psingle));
822     for (j = 0; j <= degree; j++) {
823       for (l = 0; l < npoints; l++) p[(j * (k + 1) + i) * npoints + l] = psingle[l * (degree + 1) + j];
824     }
825   }
826   PetscCall(PetscFree(psingle));
827   PetscCall(PetscFree(degrees));
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 /*@
832    PetscDTJacobiEval - evaluate Jacobi polynomials for the weight function $(1.+x)^{\alpha} (1.-x)^{\beta}$ at a set of points
833                        at points
834 
835    Not Collective
836 
837    Input Parameters:
838 +  npoints - number of spatial points to evaluate at
839 .  alpha - the left exponent > -1
840 .  beta - the right exponent > -1
841 .  points - array of locations to evaluate at
842 .  ndegree - number of basis degrees to evaluate
843 -  degrees - sorted array of degrees to evaluate
844 
845    Output Parameters:
846 +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
847 .  D - row-oriented derivative evaluation matrix (or NULL)
848 -  D2 - row-oriented second derivative evaluation matrix (or NULL)
849 
850    Level: intermediate
851 
852 .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
853 @*/
854 PetscErrorCode PetscDTJacobiEval(PetscInt npoints, PetscReal alpha, PetscReal beta, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
855 {
856   PetscFunctionBegin;
857   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
858   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
859   if (!npoints || !ndegree) PetscFunctionReturn(PETSC_SUCCESS);
860   if (B) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 0, points, ndegree, degrees, B));
861   if (D) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 1, points, ndegree, degrees, D));
862   if (D2) PetscCall(PetscDTJacobiEval_Internal(npoints, alpha, beta, 2, points, ndegree, degrees, D2));
863   PetscFunctionReturn(PETSC_SUCCESS);
864 }
865 
866 /*@
867    PetscDTLegendreEval - evaluate Legendre polynomials at points
868 
869    Not Collective
870 
871    Input Parameters:
872 +  npoints - number of spatial points to evaluate at
873 .  points - array of locations to evaluate at
874 .  ndegree - number of basis degrees to evaluate
875 -  degrees - sorted array of degrees to evaluate
876 
877    Output Parameters:
878 +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
879 .  D - row-oriented derivative evaluation matrix (or NULL)
880 -  D2 - row-oriented second derivative evaluation matrix (or NULL)
881 
882    Level: intermediate
883 
884 .seealso: `PetscDTGaussQuadrature()`
885 @*/
886 PetscErrorCode PetscDTLegendreEval(PetscInt npoints, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
887 {
888   PetscFunctionBegin;
889   PetscCall(PetscDTJacobiEval(npoints, 0., 0., points, ndegree, degrees, B, D, D2));
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 /*@
894   PetscDTIndexToGradedOrder - convert an index into a tuple of monomial degrees in a graded order (that is, if the degree sum of tuple x is less than the degree sum of tuple y, then the index of x is smaller than the index of y)
895 
896   Input Parameters:
897 + len - the desired length of the degree tuple
898 - index - the index to convert: should be >= 0
899 
900   Output Parameter:
901 . degtup - will be filled with a tuple of degrees
902 
903   Level: beginner
904 
905   Note:
906   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
907   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
908   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).
909 
910 .seealso: `PetscDTGradedOrderToIndex()`
911 @*/
912 PetscErrorCode PetscDTIndexToGradedOrder(PetscInt len, PetscInt index, PetscInt degtup[])
913 {
914   PetscInt i, total;
915   PetscInt sum;
916 
917   PetscFunctionBeginHot;
918   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
919   PetscCheck(index >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index must be non-negative");
920   total = 1;
921   sum   = 0;
922   while (index >= total) {
923     index -= total;
924     total = (total * (len + sum)) / (sum + 1);
925     sum++;
926   }
927   for (i = 0; i < len; i++) {
928     PetscInt c;
929 
930     degtup[i] = sum;
931     for (c = 0, total = 1; c < sum; c++) {
932       /* going into the loop, total is the number of way to have a tuple of sum exactly c with length len - 1 - i */
933       if (index < total) break;
934       index -= total;
935       total = (total * (len - 1 - i + c)) / (c + 1);
936       degtup[i]--;
937     }
938     sum -= degtup[i];
939   }
940   PetscFunctionReturn(PETSC_SUCCESS);
941 }
942 
943 /*@
944   PetscDTGradedOrderToIndex - convert a tuple into an index in a graded order, the inverse of `PetscDTIndexToGradedOrder()`.
945 
946   Input Parameters:
947 + len - the length of the degree tuple
948 - degtup - tuple with this length
949 
950   Output Parameter:
951 . index - index in graded order: >= 0
952 
953   Level: Beginner
954 
955   Note:
956   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
957   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
958   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).
959 
960 .seealso: `PetscDTIndexToGradedOrder()`
961 @*/
962 PetscErrorCode PetscDTGradedOrderToIndex(PetscInt len, const PetscInt degtup[], PetscInt *index)
963 {
964   PetscInt i, idx, sum, total;
965 
966   PetscFunctionBeginHot;
967   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
968   for (i = 0, sum = 0; i < len; i++) sum += degtup[i];
969   idx   = 0;
970   total = 1;
971   for (i = 0; i < sum; i++) {
972     idx += total;
973     total = (total * (len + i)) / (i + 1);
974   }
975   for (i = 0; i < len - 1; i++) {
976     PetscInt c;
977 
978     total = 1;
979     sum -= degtup[i];
980     for (c = 0; c < sum; c++) {
981       idx += total;
982       total = (total * (len - 1 - i + c)) / (c + 1);
983     }
984   }
985   *index = idx;
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 static PetscBool PKDCite       = PETSC_FALSE;
990 const char       PKDCitation[] = "@article{Kirby2010,\n"
991                                  "  title={Singularity-free evaluation of collapsed-coordinate orthogonal polynomials},\n"
992                                  "  author={Kirby, Robert C},\n"
993                                  "  journal={ACM Transactions on Mathematical Software (TOMS)},\n"
994                                  "  volume={37},\n"
995                                  "  number={1},\n"
996                                  "  pages={1--16},\n"
997                                  "  year={2010},\n"
998                                  "  publisher={ACM New York, NY, USA}\n}\n";
999 
1000 /*@
1001   PetscDTPKDEvalJet - Evaluate the jet (function and derivatives) of the Proriol-Koornwinder-Dubiner (PKD) basis for
1002   the space of polynomials up to a given degree.  The PKD basis is L2-orthonormal on the biunit simplex (which is used
1003   as the reference element for finite elements in PETSc), which makes it a stable basis to use for evaluating
1004   polynomials in that domain.
1005 
1006   Input Parameters:
1007 + dim - the number of variables in the multivariate polynomials
1008 . npoints - the number of points to evaluate the polynomials at
1009 . points - [npoints x dim] array of point coordinates
1010 . degree - the degree (sum of degrees on the variables in a monomial) of the polynomial space to evaluate.  There are ((dim + degree) choose dim) polynomials in this space.
1011 - k - the maximum order partial derivative to evaluate in the jet.  There are (dim + k choose dim) partial derivatives
1012   in the jet.  Choosing k = 0 means to evaluate just the function and no derivatives
1013 
1014   Output Parameters:
1015 - p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is ((dim + degree)
1016   choose dim) x ((dim + k) choose dim) x npoints, which also describes the order of the dimensions of this
1017   three-dimensional array: the first (slowest varying) dimension is basis function index; the second dimension is jet
1018   index; the third (fastest varying) dimension is the index of the evaluation point.
1019 
1020   Level: advanced
1021 
1022   Notes:
1023   The ordering of the basis functions, and the ordering of the derivatives in the jet, both follow the graded
1024   ordering of `PetscDTIndexToGradedOrder()` and `PetscDTGradedOrderToIndex()`.  For example, in 3D, the polynomial with
1025   leading monomial x^2,y^0,z^1, which has degree tuple (2,0,1), which by `PetscDTGradedOrderToIndex()` has index 12 (it is the 13th basis function in the space);
1026   the partial derivative $\partial_x \partial_z$ has order tuple (1,0,1), appears at index 6 in the jet (it is the 7th partial derivative in the jet).
1027 
1028   The implementation uses Kirby's singularity-free evaluation algorithm, https://doi.org/10.1145/1644001.1644006.
1029 
1030 .seealso: `PetscDTGradedOrderToIndex()`, `PetscDTIndexToGradedOrder()`, `PetscDTJacobiEvalJet()`
1031 @*/
1032 PetscErrorCode PetscDTPKDEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
1033 {
1034   PetscInt   degidx, kidx, d, pt;
1035   PetscInt   Nk, Ndeg;
1036   PetscInt  *ktup, *degtup;
1037   PetscReal *scales, initscale, scaleexp;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(PetscCitationsRegister(PKDCitation, &PKDCite));
1041   PetscCall(PetscDTBinomialInt(dim + k, k, &Nk));
1042   PetscCall(PetscDTBinomialInt(degree + dim, degree, &Ndeg));
1043   PetscCall(PetscMalloc2(dim, &degtup, dim, &ktup));
1044   PetscCall(PetscMalloc1(Ndeg, &scales));
1045   initscale = 1.;
1046   if (dim > 1) {
1047     PetscCall(PetscDTBinomial(dim, 2, &scaleexp));
1048     initscale = PetscPowReal(2., scaleexp * 0.5);
1049   }
1050   for (degidx = 0; degidx < Ndeg; degidx++) {
1051     PetscInt  e, i;
1052     PetscInt  m1idx = -1, m2idx = -1;
1053     PetscInt  n;
1054     PetscInt  degsum;
1055     PetscReal alpha;
1056     PetscReal cnm1, cnm1x, cnm2;
1057     PetscReal norm;
1058 
1059     PetscCall(PetscDTIndexToGradedOrder(dim, degidx, degtup));
1060     for (d = dim - 1; d >= 0; d--)
1061       if (degtup[d]) break;
1062     if (d < 0) { /* constant is 1 everywhere, all derivatives are zero */
1063       scales[degidx] = initscale;
1064       for (e = 0; e < dim; e++) {
1065         PetscCall(PetscDTJacobiNorm(e, 0., 0, &norm));
1066         scales[degidx] /= norm;
1067       }
1068       for (i = 0; i < npoints; i++) p[degidx * Nk * npoints + i] = 1.;
1069       for (i = 0; i < (Nk - 1) * npoints; i++) p[(degidx * Nk + 1) * npoints + i] = 0.;
1070       continue;
1071     }
1072     n = degtup[d];
1073     degtup[d]--;
1074     PetscCall(PetscDTGradedOrderToIndex(dim, degtup, &m1idx));
1075     if (degtup[d] > 0) {
1076       degtup[d]--;
1077       PetscCall(PetscDTGradedOrderToIndex(dim, degtup, &m2idx));
1078       degtup[d]++;
1079     }
1080     degtup[d]++;
1081     for (e = 0, degsum = 0; e < d; e++) degsum += degtup[e];
1082     alpha = 2 * degsum + d;
1083     PetscDTJacobiRecurrence_Internal(n, alpha, 0., cnm1, cnm1x, cnm2);
1084 
1085     scales[degidx] = initscale;
1086     for (e = 0, degsum = 0; e < dim; e++) {
1087       PetscInt  f;
1088       PetscReal ealpha;
1089       PetscReal enorm;
1090 
1091       ealpha = 2 * degsum + e;
1092       for (f = 0; f < degsum; f++) scales[degidx] *= 2.;
1093       PetscCall(PetscDTJacobiNorm(ealpha, 0., degtup[e], &enorm));
1094       scales[degidx] /= enorm;
1095       degsum += degtup[e];
1096     }
1097 
1098     for (pt = 0; pt < npoints; pt++) {
1099       /* compute the multipliers */
1100       PetscReal thetanm1, thetanm1x, thetanm2;
1101 
1102       thetanm1x = dim - (d + 1) + 2. * points[pt * dim + d];
1103       for (e = d + 1; e < dim; e++) thetanm1x += points[pt * dim + e];
1104       thetanm1x *= 0.5;
1105       thetanm1 = (2. - (dim - (d + 1)));
1106       for (e = d + 1; e < dim; e++) thetanm1 -= points[pt * dim + e];
1107       thetanm1 *= 0.5;
1108       thetanm2 = thetanm1 * thetanm1;
1109 
1110       for (kidx = 0; kidx < Nk; kidx++) {
1111         PetscInt f;
1112 
1113         PetscCall(PetscDTIndexToGradedOrder(dim, kidx, ktup));
1114         /* first sum in the same derivative terms */
1115         p[(degidx * Nk + kidx) * npoints + pt] = (cnm1 * thetanm1 + cnm1x * thetanm1x) * p[(m1idx * Nk + kidx) * npoints + pt];
1116         if (m2idx >= 0) p[(degidx * Nk + kidx) * npoints + pt] -= cnm2 * thetanm2 * p[(m2idx * Nk + kidx) * npoints + pt];
1117 
1118         for (f = d; f < dim; f++) {
1119           PetscInt km1idx, mplty = ktup[f];
1120 
1121           if (!mplty) continue;
1122           ktup[f]--;
1123           PetscCall(PetscDTGradedOrderToIndex(dim, ktup, &km1idx));
1124 
1125           /* the derivative of  cnm1x * thetanm1x  wrt x variable f is 0.5 * cnm1x if f > d otherwise it is cnm1x */
1126           /* the derivative of  cnm1  * thetanm1   wrt x variable f is 0 if f == d, otherwise it is -0.5 * cnm1 */
1127           /* the derivative of -cnm2  * thetanm2   wrt x variable f is 0 if f == d, otherwise it is cnm2 * thetanm1 */
1128           if (f > d) {
1129             PetscInt f2;
1130 
1131             p[(degidx * Nk + kidx) * npoints + pt] += mplty * 0.5 * (cnm1x - cnm1) * p[(m1idx * Nk + km1idx) * npoints + pt];
1132             if (m2idx >= 0) {
1133               p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm2 * thetanm1 * p[(m2idx * Nk + km1idx) * npoints + pt];
1134               /* second derivatives of -cnm2  * thetanm2   wrt x variable f,f2 is like - 0.5 * cnm2 */
1135               for (f2 = f; f2 < dim; f2++) {
1136                 PetscInt km2idx, mplty2 = ktup[f2];
1137                 PetscInt factor;
1138 
1139                 if (!mplty2) continue;
1140                 ktup[f2]--;
1141                 PetscCall(PetscDTGradedOrderToIndex(dim, ktup, &km2idx));
1142 
1143                 factor = mplty * mplty2;
1144                 if (f == f2) factor /= 2;
1145                 p[(degidx * Nk + kidx) * npoints + pt] -= 0.5 * factor * cnm2 * p[(m2idx * Nk + km2idx) * npoints + pt];
1146                 ktup[f2]++;
1147               }
1148             }
1149           } else {
1150             p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm1x * p[(m1idx * Nk + km1idx) * npoints + pt];
1151           }
1152           ktup[f]++;
1153         }
1154       }
1155     }
1156   }
1157   for (degidx = 0; degidx < Ndeg; degidx++) {
1158     PetscReal scale = scales[degidx];
1159     PetscInt  i;
1160 
1161     for (i = 0; i < Nk * npoints; i++) p[degidx * Nk * npoints + i] *= scale;
1162   }
1163   PetscCall(PetscFree(scales));
1164   PetscCall(PetscFree2(degtup, ktup));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 /*@
1169   PetscDTPTrimmedSize - The size of the trimmed polynomial space of k-forms with a given degree and form degree,
1170   which can be evaluated in `PetscDTPTrimmedEvalJet()`.
1171 
1172   Input Parameters:
1173 + dim - the number of variables in the multivariate polynomials
1174 . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space.
1175 - formDegree - the degree of the form
1176 
1177   Output Parameters:
1178 - size - The number ((`dim` + `degree`) choose (`dim` + `formDegree`)) x ((`degree` + `formDegree` - 1) choose (`formDegree`))
1179 
1180   Level: advanced
1181 
1182 .seealso: `PetscDTPTrimmedEvalJet()`
1183 @*/
1184 PetscErrorCode PetscDTPTrimmedSize(PetscInt dim, PetscInt degree, PetscInt formDegree, PetscInt *size)
1185 {
1186   PetscInt Nrk, Nbpt; // number of trimmed polynomials
1187 
1188   PetscFunctionBegin;
1189   formDegree = PetscAbsInt(formDegree);
1190   PetscCall(PetscDTBinomialInt(degree + dim, degree + formDegree, &Nbpt));
1191   PetscCall(PetscDTBinomialInt(degree + formDegree - 1, formDegree, &Nrk));
1192   Nbpt *= Nrk;
1193   *size = Nbpt;
1194   PetscFunctionReturn(PETSC_SUCCESS);
1195 }
1196 
1197 /* there was a reference implementation based on section 4.4 of Arnold, Falk & Winther (acta numerica, 2006), but it
1198  * was inferior to this implementation */
1199 static PetscErrorCode PetscDTPTrimmedEvalJet_Internal(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1200 {
1201   PetscInt  formDegreeOrig = formDegree;
1202   PetscBool formNegative   = (formDegreeOrig < 0) ? PETSC_TRUE : PETSC_FALSE;
1203 
1204   PetscFunctionBegin;
1205   formDegree = PetscAbsInt(formDegreeOrig);
1206   if (formDegree == 0) {
1207     PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree, jetDegree, p));
1208     PetscFunctionReturn(PETSC_SUCCESS);
1209   }
1210   if (formDegree == dim) {
1211     PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p));
1212     PetscFunctionReturn(PETSC_SUCCESS);
1213   }
1214   PetscInt Nbpt;
1215   PetscCall(PetscDTPTrimmedSize(dim, degree, formDegree, &Nbpt));
1216   PetscInt Nf;
1217   PetscCall(PetscDTBinomialInt(dim, formDegree, &Nf));
1218   PetscInt Nk;
1219   PetscCall(PetscDTBinomialInt(dim + jetDegree, dim, &Nk));
1220   PetscCall(PetscArrayzero(p, Nbpt * Nf * Nk * npoints));
1221 
1222   PetscInt Nbpm1; // number of scalar polynomials up to degree - 1;
1223   PetscCall(PetscDTBinomialInt(dim + degree - 1, dim, &Nbpm1));
1224   PetscReal *p_scalar;
1225   PetscCall(PetscMalloc1(Nbpm1 * Nk * npoints, &p_scalar));
1226   PetscCall(PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p_scalar));
1227   PetscInt total = 0;
1228   // First add the full polynomials up to degree - 1 into the basis: take the scalar
1229   // and copy one for each form component
1230   for (PetscInt i = 0; i < Nbpm1; i++) {
1231     const PetscReal *src = &p_scalar[i * Nk * npoints];
1232     for (PetscInt f = 0; f < Nf; f++) {
1233       PetscReal *dest = &p[(total++ * Nf + f) * Nk * npoints];
1234       PetscCall(PetscArraycpy(dest, src, Nk * npoints));
1235     }
1236   }
1237   PetscInt *form_atoms;
1238   PetscCall(PetscMalloc1(formDegree + 1, &form_atoms));
1239   // construct the interior product pattern
1240   PetscInt(*pattern)[3];
1241   PetscInt Nf1; // number of formDegree + 1 forms
1242   PetscCall(PetscDTBinomialInt(dim, formDegree + 1, &Nf1));
1243   PetscInt nnz = Nf1 * (formDegree + 1);
1244   PetscCall(PetscMalloc1(Nf1 * (formDegree + 1), &pattern));
1245   PetscCall(PetscDTAltVInteriorPattern(dim, formDegree + 1, pattern));
1246   PetscReal centroid = (1. - dim) / (dim + 1.);
1247   PetscInt *deriv;
1248   PetscCall(PetscMalloc1(dim, &deriv));
1249   for (PetscInt d = dim; d >= formDegree + 1; d--) {
1250     PetscInt Nfd1; // number of formDegree + 1 forms in dimension d that include dx_0
1251                    // (equal to the number of formDegree forms in dimension d-1)
1252     PetscCall(PetscDTBinomialInt(d - 1, formDegree, &Nfd1));
1253     // The number of homogeneous (degree-1) scalar polynomials in d variables
1254     PetscInt Nh;
1255     PetscCall(PetscDTBinomialInt(d - 1 + degree - 1, d - 1, &Nh));
1256     const PetscReal *h_scalar = &p_scalar[(Nbpm1 - Nh) * Nk * npoints];
1257     for (PetscInt b = 0; b < Nh; b++) {
1258       const PetscReal *h_s = &h_scalar[b * Nk * npoints];
1259       for (PetscInt f = 0; f < Nfd1; f++) {
1260         // construct all formDegree+1 forms that start with dx_(dim - d) /\ ...
1261         form_atoms[0] = dim - d;
1262         PetscCall(PetscDTEnumSubset(d - 1, formDegree, f, &form_atoms[1]));
1263         for (PetscInt i = 0; i < formDegree; i++) form_atoms[1 + i] += form_atoms[0] + 1;
1264         PetscInt f_ind; // index of the resulting form
1265         PetscCall(PetscDTSubsetIndex(dim, formDegree + 1, form_atoms, &f_ind));
1266         PetscReal *p_f = &p[total++ * Nf * Nk * npoints];
1267         for (PetscInt nz = 0; nz < nnz; nz++) {
1268           PetscInt  i     = pattern[nz][0]; // formDegree component
1269           PetscInt  j     = pattern[nz][1]; // (formDegree + 1) component
1270           PetscInt  v     = pattern[nz][2]; // coordinate component
1271           PetscReal scale = v < 0 ? -1. : 1.;
1272 
1273           i     = formNegative ? (Nf - 1 - i) : i;
1274           scale = (formNegative && (i & 1)) ? -scale : scale;
1275           v     = v < 0 ? -(v + 1) : v;
1276           if (j != f_ind) continue;
1277           PetscReal *p_i = &p_f[i * Nk * npoints];
1278           for (PetscInt jet = 0; jet < Nk; jet++) {
1279             const PetscReal *h_jet = &h_s[jet * npoints];
1280             PetscReal       *p_jet = &p_i[jet * npoints];
1281 
1282             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * h_jet[pt] * (points[pt * dim + v] - centroid);
1283             PetscCall(PetscDTIndexToGradedOrder(dim, jet, deriv));
1284             deriv[v]++;
1285             PetscReal mult = deriv[v];
1286             PetscInt  l;
1287             PetscCall(PetscDTGradedOrderToIndex(dim, deriv, &l));
1288             if (l >= Nk) continue;
1289             p_jet = &p_i[l * npoints];
1290             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * mult * h_jet[pt];
1291             deriv[v]--;
1292           }
1293         }
1294       }
1295     }
1296   }
1297   PetscCheck(total == Nbpt, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrectly counted P trimmed polynomials");
1298   PetscCall(PetscFree(deriv));
1299   PetscCall(PetscFree(pattern));
1300   PetscCall(PetscFree(form_atoms));
1301   PetscCall(PetscFree(p_scalar));
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 /*@
1306   PetscDTPTrimmedEvalJet - Evaluate the jet (function and derivatives) of a basis of the trimmed polynomial k-forms up to
1307   a given degree.
1308 
1309   Input Parameters:
1310 + dim - the number of variables in the multivariate polynomials
1311 . npoints - the number of points to evaluate the polynomials at
1312 . points - [npoints x dim] array of point coordinates
1313 . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space to evaluate.
1314            There are ((dim + degree) choose (dim + formDegree)) x ((degree + formDegree - 1) choose (formDegree)) polynomials in this space.
1315            (You can use `PetscDTPTrimmedSize()` to compute this size.)
1316 . formDegree - the degree of the form
1317 - jetDegree - the maximum order partial derivative to evaluate in the jet.  There are ((dim + jetDegree) choose dim) partial derivatives
1318               in the jet.  Choosing jetDegree = 0 means to evaluate just the function and no derivatives
1319 
1320   Output Parameter:
1321 . p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is
1322       `PetscDTPTrimmedSize()` x ((dim + formDegree) choose dim) x ((dim + k) choose dim) x npoints,
1323       which also describes the order of the dimensions of this
1324       four-dimensional array:
1325         the first (slowest varying) dimension is basis function index;
1326         the second dimension is component of the form;
1327         the third dimension is jet index;
1328         the fourth (fastest varying) dimension is the index of the evaluation point.
1329 
1330   Level: advanced
1331 
1332   Notes:
1333   The ordering of the basis functions is not graded, so the basis functions are not nested by degree like `PetscDTPKDEvalJet()`.
1334   The basis functions are not an L2-orthonormal basis on any particular domain.
1335 
1336   The implementation is based on the description of the trimmed polynomials up to degree r as
1337   the direct sum of polynomials up to degree (r-1) and the Koszul differential applied to
1338   homogeneous polynomials of degree (r-1).
1339 
1340 .seealso: `PetscDTPKDEvalJet()`, `PetscDTPTrimmedSize()`
1341 @*/
1342 PetscErrorCode PetscDTPTrimmedEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1343 {
1344   PetscFunctionBegin;
1345   PetscCall(PetscDTPTrimmedEvalJet_Internal(dim, npoints, points, degree, formDegree, jetDegree, p));
1346   PetscFunctionReturn(PETSC_SUCCESS);
1347 }
1348 
1349 /* solve the symmetric tridiagonal eigenvalue system, writing the eigenvalues into eigs and the eigenvectors into V
1350  * with lds n; diag and subdiag are overwritten */
1351 static PetscErrorCode PetscDTSymmetricTridiagonalEigensolve(PetscInt n, PetscReal diag[], PetscReal subdiag[], PetscReal eigs[], PetscScalar V[])
1352 {
1353   char          jobz   = 'V'; /* eigenvalues and eigenvectors */
1354   char          range  = 'A'; /* all eigenvalues will be found */
1355   PetscReal     VL     = 0.;  /* ignored because range is 'A' */
1356   PetscReal     VU     = 0.;  /* ignored because range is 'A' */
1357   PetscBLASInt  IL     = 0;   /* ignored because range is 'A' */
1358   PetscBLASInt  IU     = 0;   /* ignored because range is 'A' */
1359   PetscReal     abstol = 0.;  /* unused */
1360   PetscBLASInt  bn, bm, ldz;  /* bm will equal bn on exit */
1361   PetscBLASInt *isuppz;
1362   PetscBLASInt  lwork, liwork;
1363   PetscReal     workquery;
1364   PetscBLASInt  iworkquery;
1365   PetscBLASInt *iwork;
1366   PetscBLASInt  info;
1367   PetscReal    *work = NULL;
1368 
1369   PetscFunctionBegin;
1370 #if !defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1371   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1372 #endif
1373   PetscCall(PetscBLASIntCast(n, &bn));
1374   PetscCall(PetscBLASIntCast(n, &ldz));
1375 #if !defined(PETSC_MISSING_LAPACK_STEGR)
1376   PetscCall(PetscMalloc1(2 * n, &isuppz));
1377   lwork  = -1;
1378   liwork = -1;
1379   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, &workquery, &lwork, &iworkquery, &liwork, &info));
1380   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEGR error");
1381   lwork  = (PetscBLASInt)workquery;
1382   liwork = (PetscBLASInt)iworkquery;
1383   PetscCall(PetscMalloc2(lwork, &work, liwork, &iwork));
1384   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1385   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, work, &lwork, iwork, &liwork, &info));
1386   PetscCall(PetscFPTrapPop());
1387   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEGR error");
1388   PetscCall(PetscFree2(work, iwork));
1389   PetscCall(PetscFree(isuppz));
1390 #elif !defined(PETSC_MISSING_LAPACK_STEQR)
1391   jobz = 'I'; /* Compute eigenvalues and eigenvectors of the
1392                  tridiagonal matrix.  Z is initialized to the identity
1393                  matrix. */
1394   PetscCall(PetscMalloc1(PetscMax(1, 2 * n - 2), &work));
1395   PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("I", &bn, diag, subdiag, V, &ldz, work, &info));
1396   PetscCall(PetscFPTrapPop());
1397   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xSTEQR error");
1398   PetscCall(PetscFree(work));
1399   PetscCall(PetscArraycpy(eigs, diag, n));
1400 #endif
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 
1404 /* Formula for the weights at the endpoints (-1 and 1) of Gauss-Lobatto-Jacobi
1405  * quadrature rules on the interval [-1, 1] */
1406 static PetscErrorCode PetscDTGaussLobattoJacobiEndweights_Internal(PetscInt n, PetscReal alpha, PetscReal beta, PetscReal *leftw, PetscReal *rightw)
1407 {
1408   PetscReal twoab1;
1409   PetscInt  m = n - 2;
1410   PetscReal a = alpha + 1.;
1411   PetscReal b = beta + 1.;
1412   PetscReal gra, grb;
1413 
1414   PetscFunctionBegin;
1415   twoab1 = PetscPowReal(2., a + b - 1.);
1416 #if defined(PETSC_HAVE_LGAMMA)
1417   grb = PetscExpReal(2. * PetscLGamma(b + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + a + 1.) - (PetscLGamma(m + b + 1) + PetscLGamma(m + a + b + 1.)));
1418   gra = PetscExpReal(2. * PetscLGamma(a + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + b + 1.) - (PetscLGamma(m + a + 1) + PetscLGamma(m + a + b + 1.)));
1419 #else
1420   {
1421     PetscInt alphai = (PetscInt)alpha;
1422     PetscInt betai  = (PetscInt)beta;
1423 
1424     if ((PetscReal)alphai == alpha && (PetscReal)betai == beta) {
1425       PetscReal binom1, binom2;
1426 
1427       PetscCall(PetscDTBinomial(m + b, b, &binom1));
1428       PetscCall(PetscDTBinomial(m + a + b, b, &binom2));
1429       grb = 1. / (binom1 * binom2);
1430       PetscCall(PetscDTBinomial(m + a, a, &binom1));
1431       PetscCall(PetscDTBinomial(m + a + b, a, &binom2));
1432       gra = 1. / (binom1 * binom2);
1433     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1434   }
1435 #endif
1436   *leftw  = twoab1 * grb / b;
1437   *rightw = twoab1 * gra / a;
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 /* Evaluates the nth jacobi polynomial with weight parameters a,b at a point x.
1442    Recurrence relations implemented from the pseudocode given in Karniadakis and Sherwin, Appendix B */
1443 static inline PetscErrorCode PetscDTComputeJacobi(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscReal *P)
1444 {
1445   PetscReal pn1, pn2;
1446   PetscReal cnm1, cnm1x, cnm2;
1447   PetscInt  k;
1448 
1449   PetscFunctionBegin;
1450   if (!n) {
1451     *P = 1.0;
1452     PetscFunctionReturn(PETSC_SUCCESS);
1453   }
1454   PetscDTJacobiRecurrence_Internal(1, a, b, cnm1, cnm1x, cnm2);
1455   pn2 = 1.;
1456   pn1 = cnm1 + cnm1x * x;
1457   if (n == 1) {
1458     *P = pn1;
1459     PetscFunctionReturn(PETSC_SUCCESS);
1460   }
1461   *P = 0.0;
1462   for (k = 2; k < n + 1; ++k) {
1463     PetscDTJacobiRecurrence_Internal(k, a, b, cnm1, cnm1x, cnm2);
1464 
1465     *P  = (cnm1 + cnm1x * x) * pn1 - cnm2 * pn2;
1466     pn2 = pn1;
1467     pn1 = *P;
1468   }
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 /* Evaluates the first derivative of P_{n}^{a,b} at a point x. */
1473 static inline PetscErrorCode PetscDTComputeJacobiDerivative(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscInt k, PetscReal *P)
1474 {
1475   PetscReal nP;
1476   PetscInt  i;
1477 
1478   PetscFunctionBegin;
1479   *P = 0.0;
1480   if (k > n) PetscFunctionReturn(PETSC_SUCCESS);
1481   PetscCall(PetscDTComputeJacobi(a + k, b + k, n - k, x, &nP));
1482   for (i = 0; i < k; i++) nP *= (a + b + n + 1. + i) * 0.5;
1483   *P = nP;
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486 
1487 static PetscErrorCode PetscDTGaussJacobiQuadrature_Newton_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal x[], PetscReal w[])
1488 {
1489   PetscInt  maxIter = 100;
1490   PetscReal eps     = PetscExpReal(0.75 * PetscLogReal(PETSC_MACHINE_EPSILON));
1491   PetscReal a1, a6, gf;
1492   PetscInt  k;
1493 
1494   PetscFunctionBegin;
1495 
1496   a1 = PetscPowReal(2.0, a + b + 1);
1497 #if defined(PETSC_HAVE_LGAMMA)
1498   {
1499     PetscReal a2, a3, a4, a5;
1500     a2 = PetscLGamma(a + npoints + 1);
1501     a3 = PetscLGamma(b + npoints + 1);
1502     a4 = PetscLGamma(a + b + npoints + 1);
1503     a5 = PetscLGamma(npoints + 1);
1504     gf = PetscExpReal(a2 + a3 - (a4 + a5));
1505   }
1506 #else
1507   {
1508     PetscInt ia, ib;
1509 
1510     ia = (PetscInt)a;
1511     ib = (PetscInt)b;
1512     gf = 1.;
1513     if (ia == a && ia >= 0) { /* compute ratio of rising factorals wrt a */
1514       for (k = 0; k < ia; k++) gf *= (npoints + 1. + k) / (npoints + b + 1. + k);
1515     } else if (b == b && ib >= 0) { /* compute ratio of rising factorials wrt b */
1516       for (k = 0; k < ib; k++) gf *= (npoints + 1. + k) / (npoints + a + 1. + k);
1517     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1518   }
1519 #endif
1520 
1521   a6 = a1 * gf;
1522   /* Computes the m roots of P_{m}^{a,b} on [-1,1] by Newton's method with Chebyshev points as initial guesses.
1523    Algorithm implemented from the pseudocode given by Karniadakis and Sherwin and Python in FIAT */
1524   for (k = 0; k < npoints; ++k) {
1525     PetscReal r = PetscCosReal(PETSC_PI * (1. - (4. * k + 3. + 2. * b) / (4. * npoints + 2. * (a + b + 1.)))), dP;
1526     PetscInt  j;
1527 
1528     if (k > 0) r = 0.5 * (r + x[k - 1]);
1529     for (j = 0; j < maxIter; ++j) {
1530       PetscReal s = 0.0, delta, f, fp;
1531       PetscInt  i;
1532 
1533       for (i = 0; i < k; ++i) s = s + 1.0 / (r - x[i]);
1534       PetscCall(PetscDTComputeJacobi(a, b, npoints, r, &f));
1535       PetscCall(PetscDTComputeJacobiDerivative(a, b, npoints, r, 1, &fp));
1536       delta = f / (fp - f * s);
1537       r     = r - delta;
1538       if (PetscAbsReal(delta) < eps) break;
1539     }
1540     x[k] = r;
1541     PetscCall(PetscDTComputeJacobiDerivative(a, b, npoints, x[k], 1, &dP));
1542     w[k] = a6 / (1.0 - PetscSqr(x[k])) / PetscSqr(dP);
1543   }
1544   PetscFunctionReturn(PETSC_SUCCESS);
1545 }
1546 
1547 /* Compute the diagonals of the Jacobi matrix used in Golub & Welsch algorithms for Gauss-Jacobi
1548  * quadrature weight calculations on [-1,1] for exponents (1. + x)^a (1.-x)^b */
1549 static PetscErrorCode PetscDTJacobiMatrix_Internal(PetscInt nPoints, PetscReal a, PetscReal b, PetscReal *d, PetscReal *s)
1550 {
1551   PetscInt i;
1552 
1553   PetscFunctionBegin;
1554   for (i = 0; i < nPoints; i++) {
1555     PetscReal A, B, C;
1556 
1557     PetscDTJacobiRecurrence_Internal(i + 1, a, b, A, B, C);
1558     d[i] = -A / B;
1559     if (i) s[i - 1] *= C / B;
1560     if (i < nPoints - 1) s[i] = 1. / B;
1561   }
1562   PetscFunctionReturn(PETSC_SUCCESS);
1563 }
1564 
1565 static PetscErrorCode PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1566 {
1567   PetscReal mu0;
1568   PetscReal ga, gb, gab;
1569   PetscInt  i;
1570 
1571   PetscFunctionBegin;
1572   PetscCall(PetscCitationsRegister(GolubWelschCitation, &GolubWelschCite));
1573 
1574 #if defined(PETSC_HAVE_TGAMMA)
1575   ga  = PetscTGamma(a + 1);
1576   gb  = PetscTGamma(b + 1);
1577   gab = PetscTGamma(a + b + 2);
1578 #else
1579   {
1580     PetscInt ia, ib;
1581 
1582     ia = (PetscInt)a;
1583     ib = (PetscInt)b;
1584     if (ia == a && ib == b && ia + 1 > 0 && ib + 1 > 0 && ia + ib + 2 > 0) { /* All gamma(x) terms are (x-1)! terms */
1585       PetscCall(PetscDTFactorial(ia, &ga));
1586       PetscCall(PetscDTFactorial(ib, &gb));
1587       PetscCall(PetscDTFactorial(ia + ib + 1, &gb));
1588     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "tgamma() - math routine is unavailable.");
1589   }
1590 #endif
1591   mu0 = PetscPowReal(2., a + b + 1.) * ga * gb / gab;
1592 
1593 #if defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1594   {
1595     PetscReal   *diag, *subdiag;
1596     PetscScalar *V;
1597 
1598     PetscCall(PetscMalloc2(npoints, &diag, npoints, &subdiag));
1599     PetscCall(PetscMalloc1(npoints * npoints, &V));
1600     PetscCall(PetscDTJacobiMatrix_Internal(npoints, a, b, diag, subdiag));
1601     for (i = 0; i < npoints - 1; i++) subdiag[i] = PetscSqrtReal(subdiag[i]);
1602     PetscCall(PetscDTSymmetricTridiagonalEigensolve(npoints, diag, subdiag, x, V));
1603     for (i = 0; i < npoints; i++) w[i] = PetscSqr(PetscRealPart(V[i * npoints])) * mu0;
1604     PetscCall(PetscFree(V));
1605     PetscCall(PetscFree2(diag, subdiag));
1606   }
1607 #else
1608   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1609 #endif
1610   { /* As of March 2, 2020, The Sun Performance Library breaks the LAPACK contract for xstegr and xsteqr: the
1611        eigenvalues are not guaranteed to be in ascending order.  So we heave a passive aggressive sigh and check that
1612        the eigenvalues are sorted */
1613     PetscBool sorted;
1614 
1615     PetscCall(PetscSortedReal(npoints, x, &sorted));
1616     if (!sorted) {
1617       PetscInt  *order, i;
1618       PetscReal *tmp;
1619 
1620       PetscCall(PetscMalloc2(npoints, &order, npoints, &tmp));
1621       for (i = 0; i < npoints; i++) order[i] = i;
1622       PetscCall(PetscSortRealWithPermutation(npoints, x, order));
1623       PetscCall(PetscArraycpy(tmp, x, npoints));
1624       for (i = 0; i < npoints; i++) x[i] = tmp[order[i]];
1625       PetscCall(PetscArraycpy(tmp, w, npoints));
1626       for (i = 0; i < npoints; i++) w[i] = tmp[order[i]];
1627       PetscCall(PetscFree2(order, tmp));
1628     }
1629   }
1630   PetscFunctionReturn(PETSC_SUCCESS);
1631 }
1632 
1633 static PetscErrorCode PetscDTGaussJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1634 {
1635   PetscFunctionBegin;
1636   PetscCheck(npoints >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of points must be positive");
1637   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */
1638   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
1639   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
1640 
1641   if (newton) PetscCall(PetscDTGaussJacobiQuadrature_Newton_Internal(npoints, alpha, beta, x, w));
1642   else PetscCall(PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(npoints, alpha, beta, x, w));
1643   if (alpha == beta) { /* symmetrize */
1644     PetscInt i;
1645     for (i = 0; i < (npoints + 1) / 2; i++) {
1646       PetscInt  j  = npoints - 1 - i;
1647       PetscReal xi = x[i];
1648       PetscReal xj = x[j];
1649       PetscReal wi = w[i];
1650       PetscReal wj = w[j];
1651 
1652       x[i] = (xi - xj) / 2.;
1653       x[j] = (xj - xi) / 2.;
1654       w[i] = w[j] = (wi + wj) / 2.;
1655     }
1656   }
1657   PetscFunctionReturn(PETSC_SUCCESS);
1658 }
1659 
1660 /*@
1661   PetscDTGaussJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1662   $(x-a)^\alpha (x-b)^\beta$.
1663 
1664   Not Collective
1665 
1666   Input Parameters:
1667 + npoints - the number of points in the quadrature rule
1668 . a - the left endpoint of the interval
1669 . b - the right endpoint of the interval
1670 . alpha - the left exponent
1671 - beta - the right exponent
1672 
1673   Output Parameters:
1674 + x - array of length `npoints`, the locations of the quadrature points
1675 - w - array of length `npoints`, the weights of the quadrature points
1676 
1677   Level: intermediate
1678 
1679   Note:
1680   This quadrature rule is exact for polynomials up to degree 2*npoints - 1.
1681 
1682 .seealso: `PetscDTGaussQuadrature()`
1683 @*/
1684 PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1685 {
1686   PetscInt i;
1687 
1688   PetscFunctionBegin;
1689   PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal));
1690   if (a != -1. || b != 1.) { /* shift */
1691     for (i = 0; i < npoints; i++) {
1692       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1693       w[i] *= (b - a) / 2.;
1694     }
1695   }
1696   PetscFunctionReturn(PETSC_SUCCESS);
1697 }
1698 
1699 static PetscErrorCode PetscDTGaussLobattoJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1700 {
1701   PetscInt i;
1702 
1703   PetscFunctionBegin;
1704   PetscCheck(npoints >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of points must be positive");
1705   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */
1706   PetscCheck(alpha > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "alpha must be > -1.");
1707   PetscCheck(beta > -1., PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "beta must be > -1.");
1708 
1709   x[0]           = -1.;
1710   x[npoints - 1] = 1.;
1711   if (npoints > 2) PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints - 2, alpha + 1., beta + 1., &x[1], &w[1], newton));
1712   for (i = 1; i < npoints - 1; i++) w[i] /= (1. - x[i] * x[i]);
1713   PetscCall(PetscDTGaussLobattoJacobiEndweights_Internal(npoints, alpha, beta, &w[0], &w[npoints - 1]));
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 /*@
1718   PetscDTGaussLobattoJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1719   $(x-a)^\alpha (x-b)^\beta$, with endpoints a and b included as quadrature points.
1720 
1721   Not Collective
1722 
1723   Input Parameters:
1724 + npoints - the number of points in the quadrature rule
1725 . a - the left endpoint of the interval
1726 . b - the right endpoint of the interval
1727 . alpha - the left exponent
1728 - beta - the right exponent
1729 
1730   Output Parameters:
1731 + x - array of length `npoints`, the locations of the quadrature points
1732 - w - array of length `npoints`, the weights of the quadrature points
1733 
1734   Level: intermediate
1735 
1736   Note:
1737   This quadrature rule is exact for polynomials up to degree 2*npoints - 3.
1738 
1739 .seealso: `PetscDTGaussJacobiQuadrature()`
1740 @*/
1741 PetscErrorCode PetscDTGaussLobattoJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1742 {
1743   PetscInt i;
1744 
1745   PetscFunctionBegin;
1746   PetscCall(PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal));
1747   if (a != -1. || b != 1.) { /* shift */
1748     for (i = 0; i < npoints; i++) {
1749       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1750       w[i] *= (b - a) / 2.;
1751     }
1752   }
1753   PetscFunctionReturn(PETSC_SUCCESS);
1754 }
1755 
1756 /*@
1757    PetscDTGaussQuadrature - create Gauss-Legendre quadrature
1758 
1759    Not Collective
1760 
1761    Input Parameters:
1762 +  npoints - number of points
1763 .  a - left end of interval (often-1)
1764 -  b - right end of interval (often +1)
1765 
1766    Output Parameters:
1767 +  x - quadrature points
1768 -  w - quadrature weights
1769 
1770    Level: intermediate
1771 
1772    References:
1773 .  * - Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 1969.
1774 
1775 .seealso: `PetscDTLegendreEval()`, `PetscDTGaussJacobiQuadrature()`
1776 @*/
1777 PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1778 {
1779   PetscInt i;
1780 
1781   PetscFunctionBegin;
1782   PetscCall(PetscDTGaussJacobiQuadrature_Internal(npoints, 0., 0., x, w, PetscDTGaussQuadratureNewton_Internal));
1783   if (a != -1. || b != 1.) { /* shift */
1784     for (i = 0; i < npoints; i++) {
1785       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1786       w[i] *= (b - a) / 2.;
1787     }
1788   }
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 
1792 /*@C
1793    PetscDTGaussLobattoLegendreQuadrature - creates a set of the locations and weights of the Gauss-Lobatto-Legendre
1794                       nodes of a given size on the domain [-1,1]
1795 
1796    Not Collective
1797 
1798    Input Parameters:
1799 +  n - number of grid nodes
1800 -  type - `PETSCGAUSSLOBATTOLEGENDRE_VIA_LINEAR_ALGEBRA` or `PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON`
1801 
1802    Output Parameters:
1803 +  x - quadrature points
1804 -  w - quadrature weights
1805 
1806    Level: intermediate
1807 
1808    Notes:
1809     For n > 30  the Newton approach computes duplicate (incorrect) values for some nodes because the initial guess is apparently not
1810           close enough to the desired solution
1811 
1812    These are useful for implementing spectral methods based on Gauss-Lobatto-Legendre (GLL) nodes
1813 
1814    See  https://epubs.siam.org/doi/abs/10.1137/110855442  https://epubs.siam.org/doi/abs/10.1137/120889873 for better ways to compute GLL nodes
1815 
1816 .seealso: `PetscDTGaussQuadrature()`, `PetscGaussLobattoLegendreCreateType`
1817 
1818 @*/
1819 PetscErrorCode PetscDTGaussLobattoLegendreQuadrature(PetscInt npoints, PetscGaussLobattoLegendreCreateType type, PetscReal *x, PetscReal *w)
1820 {
1821   PetscBool newton;
1822 
1823   PetscFunctionBegin;
1824   PetscCheck(npoints >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must provide at least 2 grid points per element");
1825   newton = (PetscBool)(type == PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON);
1826   PetscCall(PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, 0., 0., x, w, newton));
1827   PetscFunctionReturn(PETSC_SUCCESS);
1828 }
1829 
1830 /*@
1831   PetscDTGaussTensorQuadrature - creates a tensor-product Gauss quadrature
1832 
1833   Not Collective
1834 
1835   Input Parameters:
1836 + dim     - The spatial dimension
1837 . Nc      - The number of components
1838 . npoints - number of points in one dimension
1839 . a       - left end of interval (often-1)
1840 - b       - right end of interval (often +1)
1841 
1842   Output Parameter:
1843 . q - A `PetscQuadrature` object
1844 
1845   Level: intermediate
1846 
1847 .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
1848 @*/
1849 PetscErrorCode PetscDTGaussTensorQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1850 {
1851   PetscInt   totpoints = dim > 1 ? dim > 2 ? npoints * PetscSqr(npoints) : PetscSqr(npoints) : npoints, i, j, k, c;
1852   PetscReal *x, *w, *xw, *ww;
1853 
1854   PetscFunctionBegin;
1855   PetscCall(PetscMalloc1(totpoints * dim, &x));
1856   PetscCall(PetscMalloc1(totpoints * Nc, &w));
1857   /* Set up the Golub-Welsch system */
1858   switch (dim) {
1859   case 0:
1860     PetscCall(PetscFree(x));
1861     PetscCall(PetscFree(w));
1862     PetscCall(PetscMalloc1(1, &x));
1863     PetscCall(PetscMalloc1(Nc, &w));
1864     x[0] = 0.0;
1865     for (c = 0; c < Nc; ++c) w[c] = 1.0;
1866     break;
1867   case 1:
1868     PetscCall(PetscMalloc1(npoints, &ww));
1869     PetscCall(PetscDTGaussQuadrature(npoints, a, b, x, ww));
1870     for (i = 0; i < npoints; ++i)
1871       for (c = 0; c < Nc; ++c) w[i * Nc + c] = ww[i];
1872     PetscCall(PetscFree(ww));
1873     break;
1874   case 2:
1875     PetscCall(PetscMalloc2(npoints, &xw, npoints, &ww));
1876     PetscCall(PetscDTGaussQuadrature(npoints, a, b, xw, ww));
1877     for (i = 0; i < npoints; ++i) {
1878       for (j = 0; j < npoints; ++j) {
1879         x[(i * npoints + j) * dim + 0] = xw[i];
1880         x[(i * npoints + j) * dim + 1] = xw[j];
1881         for (c = 0; c < Nc; ++c) w[(i * npoints + j) * Nc + c] = ww[i] * ww[j];
1882       }
1883     }
1884     PetscCall(PetscFree2(xw, ww));
1885     break;
1886   case 3:
1887     PetscCall(PetscMalloc2(npoints, &xw, npoints, &ww));
1888     PetscCall(PetscDTGaussQuadrature(npoints, a, b, xw, ww));
1889     for (i = 0; i < npoints; ++i) {
1890       for (j = 0; j < npoints; ++j) {
1891         for (k = 0; k < npoints; ++k) {
1892           x[((i * npoints + j) * npoints + k) * dim + 0] = xw[i];
1893           x[((i * npoints + j) * npoints + k) * dim + 1] = xw[j];
1894           x[((i * npoints + j) * npoints + k) * dim + 2] = xw[k];
1895           for (c = 0; c < Nc; ++c) w[((i * npoints + j) * npoints + k) * Nc + c] = ww[i] * ww[j] * ww[k];
1896         }
1897       }
1898     }
1899     PetscCall(PetscFree2(xw, ww));
1900     break;
1901   default:
1902     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %" PetscInt_FMT, dim);
1903   }
1904   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
1905   PetscCall(PetscQuadratureSetOrder(*q, 2 * npoints - 1));
1906   PetscCall(PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w));
1907   PetscCall(PetscObjectChangeTypeName((PetscObject)*q, "GaussTensor"));
1908   PetscFunctionReturn(PETSC_SUCCESS);
1909 }
1910 
1911 /*@
1912   PetscDTStroudConicalQuadrature - create Stroud conical quadrature for a simplex
1913 
1914   Not Collective
1915 
1916   Input Parameters:
1917 + dim     - The simplex dimension
1918 . Nc      - The number of components
1919 . npoints - The number of points in one dimension
1920 . a       - left end of interval (often-1)
1921 - b       - right end of interval (often +1)
1922 
1923   Output Parameter:
1924 . q - A `PetscQuadrature` object
1925 
1926   Level: intermediate
1927 
1928   Note:
1929   For `dim` == 1, this is Gauss-Legendre quadrature
1930 
1931   References:
1932 . * - Karniadakis and Sherwin.  FIAT
1933 
1934 .seealso: `PetscDTGaussTensorQuadrature()`, `PetscDTGaussQuadrature()`
1935 @*/
1936 PetscErrorCode PetscDTStroudConicalQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1937 {
1938   PetscInt   totprev, totrem;
1939   PetscInt   totpoints;
1940   PetscReal *p1, *w1;
1941   PetscReal *x, *w;
1942   PetscInt   i, j, k, l, m, pt, c;
1943 
1944   PetscFunctionBegin;
1945   PetscCheck(!(a != -1.0) && !(b != 1.0), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now");
1946   totpoints = 1;
1947   for (i = 0, totpoints = 1; i < dim; i++) totpoints *= npoints;
1948   PetscCall(PetscMalloc1(totpoints * dim, &x));
1949   PetscCall(PetscMalloc1(totpoints * Nc, &w));
1950   PetscCall(PetscMalloc2(npoints, &p1, npoints, &w1));
1951   for (i = 0; i < totpoints * Nc; i++) w[i] = 1.;
1952   for (i = 0, totprev = 1, totrem = totpoints / npoints; i < dim; i++) {
1953     PetscReal mul;
1954 
1955     mul = PetscPowReal(2., -i);
1956     PetscCall(PetscDTGaussJacobiQuadrature(npoints, -1., 1., i, 0.0, p1, w1));
1957     for (pt = 0, l = 0; l < totprev; l++) {
1958       for (j = 0; j < npoints; j++) {
1959         for (m = 0; m < totrem; m++, pt++) {
1960           for (k = 0; k < i; k++) x[pt * dim + k] = (x[pt * dim + k] + 1.) * (1. - p1[j]) * 0.5 - 1.;
1961           x[pt * dim + i] = p1[j];
1962           for (c = 0; c < Nc; c++) w[pt * Nc + c] *= mul * w1[j];
1963         }
1964       }
1965     }
1966     totprev *= npoints;
1967     totrem /= npoints;
1968   }
1969   PetscCall(PetscFree2(p1, w1));
1970   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
1971   PetscCall(PetscQuadratureSetOrder(*q, 2 * npoints - 1));
1972   PetscCall(PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w));
1973   PetscCall(PetscObjectChangeTypeName((PetscObject)*q, "StroudConical"));
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscBool MinSymTriQuadCite       = PETSC_FALSE;
1978 const char       MinSymTriQuadCitation[] = "@article{WitherdenVincent2015,\n"
1979                                            "  title = {On the identification of symmetric quadrature rules for finite element methods},\n"
1980                                            "  journal = {Computers & Mathematics with Applications},\n"
1981                                            "  volume = {69},\n"
1982                                            "  number = {10},\n"
1983                                            "  pages = {1232-1241},\n"
1984                                            "  year = {2015},\n"
1985                                            "  issn = {0898-1221},\n"
1986                                            "  doi = {10.1016/j.camwa.2015.03.017},\n"
1987                                            "  url = {https://www.sciencedirect.com/science/article/pii/S0898122115001224},\n"
1988                                            "  author = {F.D. Witherden and P.E. Vincent},\n"
1989                                            "}\n";
1990 
1991 #include "petscdttriquadrules.h"
1992 
1993 static PetscBool MinSymTetQuadCite       = PETSC_FALSE;
1994 const char       MinSymTetQuadCitation[] = "@article{JaskowiecSukumar2021\n"
1995                                            "  author = {Jaskowiec, Jan and Sukumar, N.},\n"
1996                                            "  title = {High-order symmetric cubature rules for tetrahedra and pyramids},\n"
1997                                            "  journal = {International Journal for Numerical Methods in Engineering},\n"
1998                                            "  volume = {122},\n"
1999                                            "  number = {1},\n"
2000                                            "  pages = {148-171},\n"
2001                                            "  doi = {10.1002/nme.6528},\n"
2002                                            "  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nme.6528},\n"
2003                                            "  eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nme.6528},\n"
2004                                            "  year = {2021}\n"
2005                                            "}\n";
2006 
2007 #include "petscdttetquadrules.h"
2008 
2009 // https://en.wikipedia.org/wiki/Partition_(number_theory)
2010 static PetscErrorCode PetscDTPartitionNumber(PetscInt n, PetscInt *p)
2011 {
2012   // sequence A000041 in the OEIS
2013   const PetscInt partition[]   = {1, 1, 2, 3, 5, 7, 11, 15, 22, 30, 42, 56, 77, 101, 135, 176, 231, 297, 385, 490, 627, 792, 1002, 1255, 1575, 1958, 2436, 3010, 3718, 4565, 5604};
2014   PetscInt       tabulated_max = PETSC_STATIC_ARRAY_LENGTH(partition) - 1;
2015 
2016   PetscFunctionBegin;
2017   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Partition number not defined for negative number %" PetscInt_FMT, n);
2018   // not implementing the pentagonal number recurrence, we don't need partition numbers for n that high
2019   PetscCheck(n <= tabulated_max, PETSC_COMM_SELF, PETSC_ERR_SUP, "Partition numbers only tabulated up to %" PetscInt_FMT ", not computed for %" PetscInt_FMT, tabulated_max, n);
2020   *p = partition[n];
2021   PetscFunctionReturn(PETSC_SUCCESS);
2022 }
2023 
2024 /*@
2025   PetscDTSimplexQuadrature - Create a quadrature rule for a simplex that exactly integrates polynomials up to a given degree.
2026 
2027   Not Collective
2028 
2029   Input Parameters:
2030 + dim     - The spatial dimension of the simplex (1 = segment, 2 = triangle, 3 = tetrahedron)
2031 . degree  - The largest polynomial degree that is required to be integrated exactly
2032 - type    - left end of interval (often-1)
2033 
2034   Output Parameter:
2035 . quad    - A `PetscQuadrature` object for integration over the biunit simplex
2036             (defined by the bounds $x_i >= -1$ and $\sum_i x_i <= 2 - d$) that is exact for
2037             polynomials up to the given degree
2038 
2039   Level: intermediate
2040 
2041 .seealso: `PetscDTSimplexQuadratureType`, `PetscDTGaussQuadrature()`, `PetscDTStroudCononicalQuadrature()`, `PetscQuadrature`
2042 @*/
2043 PetscErrorCode PetscDTSimplexQuadrature(PetscInt dim, PetscInt degree, PetscDTSimplexQuadratureType type, PetscQuadrature *quad)
2044 {
2045   PetscDTSimplexQuadratureType orig_type = type;
2046 
2047   PetscFunctionBegin;
2048   PetscCheck(dim >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative dimension %" PetscInt_FMT, dim);
2049   PetscCheck(degree >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative degree %" PetscInt_FMT, degree);
2050   if (type == PETSCDTSIMPLEXQUAD_DEFAULT) type = PETSCDTSIMPLEXQUAD_MINSYM;
2051   if (type == PETSCDTSIMPLEXQUAD_CONIC || dim < 2) {
2052     PetscInt points_per_dim = (degree + 2) / 2; // ceil((degree + 1) / 2);
2053     PetscCall(PetscDTStroudConicalQuadrature(dim, 1, points_per_dim, -1, 1, quad));
2054   } else {
2055     PetscInt          n    = dim + 1;
2056     PetscInt          fact = 1;
2057     PetscInt         *part, *perm;
2058     PetscInt          p = 0;
2059     PetscInt          max_degree;
2060     const PetscInt   *nodes_per_type     = NULL;
2061     const PetscInt   *all_num_full_nodes = NULL;
2062     const PetscReal **weights_list       = NULL;
2063     const PetscReal **compact_nodes_list = NULL;
2064     const char       *citation           = NULL;
2065     PetscBool        *cited              = NULL;
2066 
2067     switch (dim) {
2068     case 2:
2069       cited              = &MinSymTriQuadCite;
2070       citation           = MinSymTriQuadCitation;
2071       max_degree         = PetscDTWVTriQuad_max_degree;
2072       nodes_per_type     = PetscDTWVTriQuad_num_orbits;
2073       all_num_full_nodes = PetscDTWVTriQuad_num_nodes;
2074       weights_list       = PetscDTWVTriQuad_weights;
2075       compact_nodes_list = PetscDTWVTriQuad_orbits;
2076       break;
2077     case 3:
2078       cited              = &MinSymTetQuadCite;
2079       citation           = MinSymTetQuadCitation;
2080       max_degree         = PetscDTJSTetQuad_max_degree;
2081       nodes_per_type     = PetscDTJSTetQuad_num_orbits;
2082       all_num_full_nodes = PetscDTJSTetQuad_num_nodes;
2083       weights_list       = PetscDTJSTetQuad_weights;
2084       compact_nodes_list = PetscDTJSTetQuad_orbits;
2085       break;
2086     default:
2087       max_degree = -1;
2088       break;
2089     }
2090 
2091     if (degree > max_degree) {
2092       if (orig_type == PETSCDTSIMPLEXQUAD_DEFAULT) {
2093         // fall back to conic
2094         PetscCall(PetscDTSimplexQuadrature(dim, degree, PETSCDTSIMPLEXQUAD_CONIC, quad));
2095         PetscFunctionReturn(PETSC_SUCCESS);
2096       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Minimal symmetric quadrature for dim %" PetscInt_FMT ", degree %" PetscInt_FMT " unsupported", dim, degree);
2097     }
2098 
2099     PetscCall(PetscCitationsRegister(citation, cited));
2100 
2101     PetscCall(PetscDTPartitionNumber(n, &p));
2102     for (PetscInt d = 2; d <= n; d++) fact *= d;
2103 
2104     PetscInt         num_full_nodes      = all_num_full_nodes[degree];
2105     const PetscReal *all_compact_nodes   = compact_nodes_list[degree];
2106     const PetscReal *all_compact_weights = weights_list[degree];
2107     nodes_per_type                       = &nodes_per_type[p * degree];
2108 
2109     PetscReal      *points;
2110     PetscReal      *counts;
2111     PetscReal      *weights;
2112     PetscReal      *bary_to_biunit; // row-major transformation of barycentric coordinate to biunit
2113     PetscQuadrature q;
2114 
2115     // compute the transformation
2116     PetscCall(PetscMalloc1(n * dim, &bary_to_biunit));
2117     for (PetscInt d = 0; d < dim; d++) {
2118       for (PetscInt b = 0; b < n; b++) bary_to_biunit[d * n + b] = (d == b) ? 1.0 : -1.0;
2119     }
2120 
2121     PetscCall(PetscMalloc3(n, &part, n, &perm, n, &counts));
2122     PetscCall(PetscCalloc1(num_full_nodes * dim, &points));
2123     PetscCall(PetscMalloc1(num_full_nodes, &weights));
2124 
2125     // (0, 0, ...) is the first partition lexicographically
2126     PetscCall(PetscArrayzero(part, n));
2127     PetscCall(PetscArrayzero(counts, n));
2128     counts[0] = n;
2129 
2130     // for each partition
2131     for (PetscInt s = 0, node_offset = 0; s < p; s++) {
2132       PetscInt num_compact_coords = part[n - 1] + 1;
2133 
2134       const PetscReal *compact_nodes   = all_compact_nodes;
2135       const PetscReal *compact_weights = all_compact_weights;
2136       all_compact_nodes += num_compact_coords * nodes_per_type[s];
2137       all_compact_weights += nodes_per_type[s];
2138 
2139       // for every permutation of the vertices
2140       for (PetscInt f = 0; f < fact; f++) {
2141         PetscCall(PetscDTEnumPerm(n, f, perm, NULL));
2142 
2143         // check if it is a valid permutation
2144         PetscInt digit;
2145         for (digit = 1; digit < n; digit++) {
2146           // skip permutations that would duplicate a node because it has a smaller symmetry group
2147           if (part[digit - 1] == part[digit] && perm[digit - 1] > perm[digit]) break;
2148         }
2149         if (digit < n) continue;
2150 
2151         // create full nodes from this permutation of the compact nodes
2152         PetscReal *full_nodes   = &points[node_offset * dim];
2153         PetscReal *full_weights = &weights[node_offset];
2154 
2155         PetscCall(PetscArraycpy(full_weights, compact_weights, nodes_per_type[s]));
2156         for (PetscInt b = 0; b < n; b++) {
2157           for (PetscInt d = 0; d < dim; d++) {
2158             for (PetscInt node = 0; node < nodes_per_type[s]; node++) full_nodes[node * dim + d] += bary_to_biunit[d * n + perm[b]] * compact_nodes[node * num_compact_coords + part[b]];
2159           }
2160         }
2161         node_offset += nodes_per_type[s];
2162       }
2163 
2164       if (s < p - 1) { // Generate the next partition
2165         /* A partition is described by the number of coordinates that are in
2166          * each set of duplicates (counts) and redundantly by mapping each
2167          * index to its set of duplicates (part)
2168          *
2169          * Counts should always be in nonincreasing order
2170          *
2171          * We want to generate the partitions lexically by part, which means
2172          * finding the last index where count > 1 and reducing by 1.
2173          *
2174          * For the new counts beyond that index, we eagerly assign the remaining
2175          * capacity of the partition to smaller indices (ensures lexical ordering),
2176          * while respecting the nonincreasing invariant of the counts
2177          */
2178         PetscInt last_digit            = part[n - 1];
2179         PetscInt last_digit_with_extra = last_digit;
2180         while (counts[last_digit_with_extra] == 1) last_digit_with_extra--;
2181         PetscInt limit               = --counts[last_digit_with_extra];
2182         PetscInt total_to_distribute = last_digit - last_digit_with_extra + 1;
2183         for (PetscInt digit = last_digit_with_extra + 1; digit < n; digit++) {
2184           counts[digit] = PetscMin(limit, total_to_distribute);
2185           total_to_distribute -= PetscMin(limit, total_to_distribute);
2186         }
2187         for (PetscInt digit = 0, offset = 0; digit < n; digit++) {
2188           PetscInt count = counts[digit];
2189           for (PetscInt c = 0; c < count; c++) part[offset++] = digit;
2190         }
2191       }
2192     }
2193     PetscCall(PetscFree3(part, perm, counts));
2194     PetscCall(PetscFree(bary_to_biunit));
2195     PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, &q));
2196     PetscCall(PetscQuadratureSetOrder(q, degree));
2197     PetscCall(PetscQuadratureSetData(q, dim, 1, num_full_nodes, points, weights));
2198     *quad = q;
2199   }
2200   PetscFunctionReturn(PETSC_SUCCESS);
2201 }
2202 
2203 /*@
2204   PetscDTTanhSinhTensorQuadrature - create tanh-sinh quadrature for a tensor product cell
2205 
2206   Not Collective
2207 
2208   Input Parameters:
2209 + dim   - The cell dimension
2210 . level - The number of points in one dimension, 2^l
2211 . a     - left end of interval (often-1)
2212 - b     - right end of interval (often +1)
2213 
2214   Output Parameter:
2215 . q - A `PetscQuadrature` object
2216 
2217   Level: intermediate
2218 
2219 .seealso: `PetscDTGaussTensorQuadrature()`, `PetscQuadrature`
2220 @*/
2221 PetscErrorCode PetscDTTanhSinhTensorQuadrature(PetscInt dim, PetscInt level, PetscReal a, PetscReal b, PetscQuadrature *q)
2222 {
2223   const PetscInt  p     = 16;                        /* Digits of precision in the evaluation */
2224   const PetscReal alpha = (b - a) / 2.;              /* Half-width of the integration interval */
2225   const PetscReal beta  = (b + a) / 2.;              /* Center of the integration interval */
2226   const PetscReal h     = PetscPowReal(2.0, -level); /* Step size, length between x_k */
2227   PetscReal       xk;                                /* Quadrature point x_k on reference domain [-1, 1] */
2228   PetscReal       wk = 0.5 * PETSC_PI;               /* Quadrature weight at x_k */
2229   PetscReal      *x, *w;
2230   PetscInt        K, k, npoints;
2231 
2232   PetscFunctionBegin;
2233   PetscCheck(dim <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Dimension %" PetscInt_FMT " not yet implemented", dim);
2234   PetscCheck(level, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a number of significant digits");
2235   /* Find K such that the weights are < 32 digits of precision */
2236   for (K = 1; PetscAbsReal(PetscLog10Real(wk)) < 2 * p; ++K) wk = 0.5 * h * PETSC_PI * PetscCoshReal(K * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(K * h)));
2237   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
2238   PetscCall(PetscQuadratureSetOrder(*q, 2 * K + 1));
2239   npoints = 2 * K - 1;
2240   PetscCall(PetscMalloc1(npoints * dim, &x));
2241   PetscCall(PetscMalloc1(npoints, &w));
2242   /* Center term */
2243   x[0] = beta;
2244   w[0] = 0.5 * alpha * PETSC_PI;
2245   for (k = 1; k < K; ++k) {
2246     wk           = 0.5 * alpha * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2247     xk           = PetscTanhReal(0.5 * PETSC_PI * PetscSinhReal(k * h));
2248     x[2 * k - 1] = -alpha * xk + beta;
2249     w[2 * k - 1] = wk;
2250     x[2 * k + 0] = alpha * xk + beta;
2251     w[2 * k + 0] = wk;
2252   }
2253   PetscCall(PetscQuadratureSetData(*q, dim, 1, npoints, x, w));
2254   PetscFunctionReturn(PETSC_SUCCESS);
2255 }
2256 
2257 PetscErrorCode PetscDTTanhSinhIntegrate(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2258 {
2259   const PetscInt  p     = 16;           /* Digits of precision in the evaluation */
2260   const PetscReal alpha = (b - a) / 2.; /* Half-width of the integration interval */
2261   const PetscReal beta  = (b + a) / 2.; /* Center of the integration interval */
2262   PetscReal       h     = 1.0;          /* Step size, length between x_k */
2263   PetscInt        l     = 0;            /* Level of refinement, h = 2^{-l} */
2264   PetscReal       osum  = 0.0;          /* Integral on last level */
2265   PetscReal       psum  = 0.0;          /* Integral on the level before the last level */
2266   PetscReal       sum;                  /* Integral on current level */
2267   PetscReal       yk;                   /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2268   PetscReal       lx, rx;               /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2269   PetscReal       wk;                   /* Quadrature weight at x_k */
2270   PetscReal       lval, rval;           /* Terms in the quadature sum to the left and right of 0 */
2271   PetscInt        d;                    /* Digits of precision in the integral */
2272 
2273   PetscFunctionBegin;
2274   PetscCheck(digits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a positive number of significant digits");
2275   /* Center term */
2276   func(&beta, ctx, &lval);
2277   sum = 0.5 * alpha * PETSC_PI * lval;
2278   /* */
2279   do {
2280     PetscReal lterm, rterm, maxTerm = 0.0, d1, d2, d3, d4;
2281     PetscInt  k = 1;
2282 
2283     ++l;
2284     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2285     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2286     psum = osum;
2287     osum = sum;
2288     h *= 0.5;
2289     sum *= 0.5;
2290     do {
2291       wk = 0.5 * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2292       yk = 1.0 / (PetscExpReal(0.5 * PETSC_PI * PetscSinhReal(k * h)) * PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2293       lx = -alpha * (1.0 - yk) + beta;
2294       rx = alpha * (1.0 - yk) + beta;
2295       func(&lx, ctx, &lval);
2296       func(&rx, ctx, &rval);
2297       lterm   = alpha * wk * lval;
2298       maxTerm = PetscMax(PetscAbsReal(lterm), maxTerm);
2299       sum += lterm;
2300       rterm   = alpha * wk * rval;
2301       maxTerm = PetscMax(PetscAbsReal(rterm), maxTerm);
2302       sum += rterm;
2303       ++k;
2304       /* Only need to evaluate every other point on refined levels */
2305       if (l != 1) ++k;
2306     } while (PetscAbsReal(PetscLog10Real(wk)) < p); /* Only need to evaluate sum until weights are < 32 digits of precision */
2307 
2308     d1 = PetscLog10Real(PetscAbsReal(sum - osum));
2309     d2 = PetscLog10Real(PetscAbsReal(sum - psum));
2310     d3 = PetscLog10Real(maxTerm) - p;
2311     if (PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)) == 0.0) d4 = 0.0;
2312     else d4 = PetscLog10Real(PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)));
2313     d = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2314   } while (d < digits && l < 12);
2315   *sol = sum;
2316 
2317   PetscFunctionReturn(PETSC_SUCCESS);
2318 }
2319 
2320 #if defined(PETSC_HAVE_MPFR)
2321 PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2322 {
2323   const PetscInt safetyFactor = 2; /* Calculate abcissa until 2*p digits */
2324   PetscInt       l            = 0; /* Level of refinement, h = 2^{-l} */
2325   mpfr_t         alpha;            /* Half-width of the integration interval */
2326   mpfr_t         beta;             /* Center of the integration interval */
2327   mpfr_t         h;                /* Step size, length between x_k */
2328   mpfr_t         osum;             /* Integral on last level */
2329   mpfr_t         psum;             /* Integral on the level before the last level */
2330   mpfr_t         sum;              /* Integral on current level */
2331   mpfr_t         yk;               /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2332   mpfr_t         lx, rx;           /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2333   mpfr_t         wk;               /* Quadrature weight at x_k */
2334   PetscReal      lval, rval, rtmp; /* Terms in the quadature sum to the left and right of 0 */
2335   PetscInt       d;                /* Digits of precision in the integral */
2336   mpfr_t         pi2, kh, msinh, mcosh, maxTerm, curTerm, tmp;
2337 
2338   PetscFunctionBegin;
2339   PetscCheck(digits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must give a positive number of significant digits");
2340   /* Create high precision storage */
2341   mpfr_inits2(PetscCeilReal(safetyFactor * digits * PetscLogReal(10.) / PetscLogReal(2.)), alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2342   /* Initialization */
2343   mpfr_set_d(alpha, 0.5 * (b - a), MPFR_RNDN);
2344   mpfr_set_d(beta, 0.5 * (b + a), MPFR_RNDN);
2345   mpfr_set_d(osum, 0.0, MPFR_RNDN);
2346   mpfr_set_d(psum, 0.0, MPFR_RNDN);
2347   mpfr_set_d(h, 1.0, MPFR_RNDN);
2348   mpfr_const_pi(pi2, MPFR_RNDN);
2349   mpfr_mul_d(pi2, pi2, 0.5, MPFR_RNDN);
2350   /* Center term */
2351   rtmp = 0.5 * (b + a);
2352   func(&rtmp, ctx, &lval);
2353   mpfr_set(sum, pi2, MPFR_RNDN);
2354   mpfr_mul(sum, sum, alpha, MPFR_RNDN);
2355   mpfr_mul_d(sum, sum, lval, MPFR_RNDN);
2356   /* */
2357   do {
2358     PetscReal d1, d2, d3, d4;
2359     PetscInt  k = 1;
2360 
2361     ++l;
2362     mpfr_set_d(maxTerm, 0.0, MPFR_RNDN);
2363     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2364     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2365     mpfr_set(psum, osum, MPFR_RNDN);
2366     mpfr_set(osum, sum, MPFR_RNDN);
2367     mpfr_mul_d(h, h, 0.5, MPFR_RNDN);
2368     mpfr_mul_d(sum, sum, 0.5, MPFR_RNDN);
2369     do {
2370       mpfr_set_si(kh, k, MPFR_RNDN);
2371       mpfr_mul(kh, kh, h, MPFR_RNDN);
2372       /* Weight */
2373       mpfr_set(wk, h, MPFR_RNDN);
2374       mpfr_sinh_cosh(msinh, mcosh, kh, MPFR_RNDN);
2375       mpfr_mul(msinh, msinh, pi2, MPFR_RNDN);
2376       mpfr_mul(mcosh, mcosh, pi2, MPFR_RNDN);
2377       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2378       mpfr_sqr(tmp, tmp, MPFR_RNDN);
2379       mpfr_mul(wk, wk, mcosh, MPFR_RNDN);
2380       mpfr_div(wk, wk, tmp, MPFR_RNDN);
2381       /* Abscissa */
2382       mpfr_set_d(yk, 1.0, MPFR_RNDZ);
2383       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2384       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2385       mpfr_exp(tmp, msinh, MPFR_RNDN);
2386       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2387       /* Quadrature points */
2388       mpfr_sub_d(lx, yk, 1.0, MPFR_RNDZ);
2389       mpfr_mul(lx, lx, alpha, MPFR_RNDU);
2390       mpfr_add(lx, lx, beta, MPFR_RNDU);
2391       mpfr_d_sub(rx, 1.0, yk, MPFR_RNDZ);
2392       mpfr_mul(rx, rx, alpha, MPFR_RNDD);
2393       mpfr_add(rx, rx, beta, MPFR_RNDD);
2394       /* Evaluation */
2395       rtmp = mpfr_get_d(lx, MPFR_RNDU);
2396       func(&rtmp, ctx, &lval);
2397       rtmp = mpfr_get_d(rx, MPFR_RNDD);
2398       func(&rtmp, ctx, &rval);
2399       /* Update */
2400       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2401       mpfr_mul_d(tmp, tmp, lval, MPFR_RNDN);
2402       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2403       mpfr_abs(tmp, tmp, MPFR_RNDN);
2404       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2405       mpfr_set(curTerm, tmp, MPFR_RNDN);
2406       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2407       mpfr_mul_d(tmp, tmp, rval, MPFR_RNDN);
2408       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2409       mpfr_abs(tmp, tmp, MPFR_RNDN);
2410       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2411       mpfr_max(curTerm, curTerm, tmp, MPFR_RNDN);
2412       ++k;
2413       /* Only need to evaluate every other point on refined levels */
2414       if (l != 1) ++k;
2415       mpfr_log10(tmp, wk, MPFR_RNDN);
2416       mpfr_abs(tmp, tmp, MPFR_RNDN);
2417     } while (mpfr_get_d(tmp, MPFR_RNDN) < safetyFactor * digits); /* Only need to evaluate sum until weights are < 32 digits of precision */
2418     mpfr_sub(tmp, sum, osum, MPFR_RNDN);
2419     mpfr_abs(tmp, tmp, MPFR_RNDN);
2420     mpfr_log10(tmp, tmp, MPFR_RNDN);
2421     d1 = mpfr_get_d(tmp, MPFR_RNDN);
2422     mpfr_sub(tmp, sum, psum, MPFR_RNDN);
2423     mpfr_abs(tmp, tmp, MPFR_RNDN);
2424     mpfr_log10(tmp, tmp, MPFR_RNDN);
2425     d2 = mpfr_get_d(tmp, MPFR_RNDN);
2426     mpfr_log10(tmp, maxTerm, MPFR_RNDN);
2427     d3 = mpfr_get_d(tmp, MPFR_RNDN) - digits;
2428     mpfr_log10(tmp, curTerm, MPFR_RNDN);
2429     d4 = mpfr_get_d(tmp, MPFR_RNDN);
2430     d  = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2431   } while (d < digits && l < 8);
2432   *sol = mpfr_get_d(sum, MPFR_RNDN);
2433   /* Cleanup */
2434   mpfr_clears(alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2435   PetscFunctionReturn(PETSC_SUCCESS);
2436 }
2437 #else
2438 
2439 PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2440 {
2441   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "This method will not work without MPFR. Reconfigure using --download-mpfr --download-gmp");
2442 }
2443 #endif
2444 
2445 /*@
2446   PetscDTTensorQuadratureCreate - create the tensor product quadrature from two lower-dimensional quadratures
2447 
2448   Not Collective
2449 
2450   Input Parameters:
2451 + q1 - The first quadrature
2452 - q2 - The second quadrature
2453 
2454   Output Parameter:
2455 . q - A `PetscQuadrature` object
2456 
2457   Level: intermediate
2458 
2459 .seealso: `PetscQuadrature`, `PetscDTGaussTensorQuadrature()`
2460 @*/
2461 PetscErrorCode PetscDTTensorQuadratureCreate(PetscQuadrature q1, PetscQuadrature q2, PetscQuadrature *q)
2462 {
2463   const PetscReal *x1, *w1, *x2, *w2;
2464   PetscReal       *x, *w;
2465   PetscInt         dim1, Nc1, Np1, order1, qa, d1;
2466   PetscInt         dim2, Nc2, Np2, order2, qb, d2;
2467   PetscInt         dim, Nc, Np, order, qc, d;
2468 
2469   PetscFunctionBegin;
2470   PetscValidHeaderSpecific(q1, PETSCQUADRATURE_CLASSID, 1);
2471   PetscValidHeaderSpecific(q2, PETSCQUADRATURE_CLASSID, 2);
2472   PetscValidPointer(q, 3);
2473   PetscCall(PetscQuadratureGetOrder(q1, &order1));
2474   PetscCall(PetscQuadratureGetOrder(q2, &order2));
2475   PetscCheck(order1 == order2, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Order1 %" PetscInt_FMT " != %" PetscInt_FMT " Order2", order1, order2);
2476   PetscCall(PetscQuadratureGetData(q1, &dim1, &Nc1, &Np1, &x1, &w1));
2477   PetscCall(PetscQuadratureGetData(q2, &dim2, &Nc2, &Np2, &x2, &w2));
2478   PetscCheck(Nc1 == Nc2, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "NumComp1 %" PetscInt_FMT " != %" PetscInt_FMT " NumComp2", Nc1, Nc2);
2479 
2480   dim   = dim1 + dim2;
2481   Nc    = Nc1;
2482   Np    = Np1 * Np2;
2483   order = order1;
2484   PetscCall(PetscQuadratureCreate(PETSC_COMM_SELF, q));
2485   PetscCall(PetscQuadratureSetOrder(*q, order));
2486   PetscCall(PetscMalloc1(Np * dim, &x));
2487   PetscCall(PetscMalloc1(Np, &w));
2488   for (qa = 0, qc = 0; qa < Np1; ++qa) {
2489     for (qb = 0; qb < Np2; ++qb, ++qc) {
2490       for (d1 = 0, d = 0; d1 < dim1; ++d1, ++d) x[qc * dim + d] = x1[qa * dim1 + d1];
2491       for (d2 = 0; d2 < dim2; ++d2, ++d) x[qc * dim + d] = x2[qb * dim2 + d2];
2492       w[qc] = w1[qa] * w2[qb];
2493     }
2494   }
2495   PetscCall(PetscQuadratureSetData(*q, dim, Nc, Np, x, w));
2496   PetscFunctionReturn(PETSC_SUCCESS);
2497 }
2498 
2499 /* Overwrites A. Can only handle full-rank problems with m>=n
2500    A in column-major format
2501    Ainv in row-major format
2502    tau has length m
2503    worksize must be >= max(1,n)
2504  */
2505 static PetscErrorCode PetscDTPseudoInverseQR(PetscInt m, PetscInt mstride, PetscInt n, PetscReal *A_in, PetscReal *Ainv_out, PetscScalar *tau, PetscInt worksize, PetscScalar *work)
2506 {
2507   PetscBLASInt M, N, K, lda, ldb, ldwork, info;
2508   PetscScalar *A, *Ainv, *R, *Q, Alpha;
2509 
2510   PetscFunctionBegin;
2511 #if defined(PETSC_USE_COMPLEX)
2512   {
2513     PetscInt i, j;
2514     PetscCall(PetscMalloc2(m * n, &A, m * n, &Ainv));
2515     for (j = 0; j < n; j++) {
2516       for (i = 0; i < m; i++) A[i + m * j] = A_in[i + mstride * j];
2517     }
2518     mstride = m;
2519   }
2520 #else
2521   A = A_in;
2522   Ainv = Ainv_out;
2523 #endif
2524 
2525   PetscCall(PetscBLASIntCast(m, &M));
2526   PetscCall(PetscBLASIntCast(n, &N));
2527   PetscCall(PetscBLASIntCast(mstride, &lda));
2528   PetscCall(PetscBLASIntCast(worksize, &ldwork));
2529   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
2530   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&M, &N, A, &lda, tau, work, &ldwork, &info));
2531   PetscCall(PetscFPTrapPop());
2532   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "xGEQRF error");
2533   R = A; /* Upper triangular part of A now contains R, the rest contains the elementary reflectors */
2534 
2535   /* Extract an explicit representation of Q */
2536   Q = Ainv;
2537   PetscCall(PetscArraycpy(Q, A, mstride * n));
2538   K = N; /* full rank */
2539   PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&M, &N, &K, Q, &lda, tau, work, &ldwork, &info));
2540   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "xORGQR/xUNGQR error");
2541 
2542   /* Compute A^{-T} = (R^{-1} Q^T)^T = Q R^{-T} */
2543   Alpha = 1.0;
2544   ldb   = lda;
2545   PetscCallBLAS("BLAStrsm", BLAStrsm_("Right", "Upper", "ConjugateTranspose", "NotUnitTriangular", &M, &N, &Alpha, R, &lda, Q, &ldb));
2546   /* Ainv is Q, overwritten with inverse */
2547 
2548 #if defined(PETSC_USE_COMPLEX)
2549   {
2550     PetscInt i;
2551     for (i = 0; i < m * n; i++) Ainv_out[i] = PetscRealPart(Ainv[i]);
2552     PetscCall(PetscFree2(A, Ainv));
2553   }
2554 #endif
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 /* Computes integral of L_p' over intervals {(x0,x1),(x1,x2),...} */
2559 static PetscErrorCode PetscDTLegendreIntegrate(PetscInt ninterval, const PetscReal *x, PetscInt ndegree, const PetscInt *degrees, PetscBool Transpose, PetscReal *B)
2560 {
2561   PetscReal *Bv;
2562   PetscInt   i, j;
2563 
2564   PetscFunctionBegin;
2565   PetscCall(PetscMalloc1((ninterval + 1) * ndegree, &Bv));
2566   /* Point evaluation of L_p on all the source vertices */
2567   PetscCall(PetscDTLegendreEval(ninterval + 1, x, ndegree, degrees, Bv, NULL, NULL));
2568   /* Integral over each interval: \int_a^b L_p' = L_p(b)-L_p(a) */
2569   for (i = 0; i < ninterval; i++) {
2570     for (j = 0; j < ndegree; j++) {
2571       if (Transpose) B[i + ninterval * j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2572       else B[i * ndegree + j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2573     }
2574   }
2575   PetscCall(PetscFree(Bv));
2576   PetscFunctionReturn(PETSC_SUCCESS);
2577 }
2578 
2579 /*@
2580    PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals
2581 
2582    Not Collective
2583 
2584    Input Parameters:
2585 +  degree - degree of reconstruction polynomial
2586 .  nsource - number of source intervals
2587 .  sourcex - sorted coordinates of source cell boundaries (length nsource+1)
2588 .  ntarget - number of target intervals
2589 -  targetx - sorted coordinates of target cell boundaries (length ntarget+1)
2590 
2591    Output Parameter:
2592 .  R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s]
2593 
2594    Level: advanced
2595 
2596 .seealso: `PetscDTLegendreEval()`
2597 @*/
2598 PetscErrorCode PetscDTReconstructPoly(PetscInt degree, PetscInt nsource, const PetscReal *sourcex, PetscInt ntarget, const PetscReal *targetx, PetscReal *R)
2599 {
2600   PetscInt     i, j, k, *bdegrees, worksize;
2601   PetscReal    xmin, xmax, center, hscale, *sourcey, *targety, *Bsource, *Bsinv, *Btarget;
2602   PetscScalar *tau, *work;
2603 
2604   PetscFunctionBegin;
2605   PetscValidRealPointer(sourcex, 3);
2606   PetscValidRealPointer(targetx, 5);
2607   PetscValidRealPointer(R, 6);
2608   PetscCheck(degree < nsource, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Reconstruction degree %" PetscInt_FMT " must be less than number of source intervals %" PetscInt_FMT, degree, nsource);
2609   if (PetscDefined(USE_DEBUG)) {
2610     for (i = 0; i < nsource; i++) PetscCheck(sourcex[i] < sourcex[i + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Source interval %" PetscInt_FMT " has negative orientation (%g,%g)", i, (double)sourcex[i], (double)sourcex[i + 1]);
2611     for (i = 0; i < ntarget; i++) PetscCheck(targetx[i] < targetx[i + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Target interval %" PetscInt_FMT " has negative orientation (%g,%g)", i, (double)targetx[i], (double)targetx[i + 1]);
2612   }
2613   xmin     = PetscMin(sourcex[0], targetx[0]);
2614   xmax     = PetscMax(sourcex[nsource], targetx[ntarget]);
2615   center   = (xmin + xmax) / 2;
2616   hscale   = (xmax - xmin) / 2;
2617   worksize = nsource;
2618   PetscCall(PetscMalloc4(degree + 1, &bdegrees, nsource + 1, &sourcey, nsource * (degree + 1), &Bsource, worksize, &work));
2619   PetscCall(PetscMalloc4(nsource, &tau, nsource * (degree + 1), &Bsinv, ntarget + 1, &targety, ntarget * (degree + 1), &Btarget));
2620   for (i = 0; i <= nsource; i++) sourcey[i] = (sourcex[i] - center) / hscale;
2621   for (i = 0; i <= degree; i++) bdegrees[i] = i + 1;
2622   PetscCall(PetscDTLegendreIntegrate(nsource, sourcey, degree + 1, bdegrees, PETSC_TRUE, Bsource));
2623   PetscCall(PetscDTPseudoInverseQR(nsource, nsource, degree + 1, Bsource, Bsinv, tau, nsource, work));
2624   for (i = 0; i <= ntarget; i++) targety[i] = (targetx[i] - center) / hscale;
2625   PetscCall(PetscDTLegendreIntegrate(ntarget, targety, degree + 1, bdegrees, PETSC_FALSE, Btarget));
2626   for (i = 0; i < ntarget; i++) {
2627     PetscReal rowsum = 0;
2628     for (j = 0; j < nsource; j++) {
2629       PetscReal sum = 0;
2630       for (k = 0; k < degree + 1; k++) sum += Btarget[i * (degree + 1) + k] * Bsinv[k * nsource + j];
2631       R[i * nsource + j] = sum;
2632       rowsum += sum;
2633     }
2634     for (j = 0; j < nsource; j++) R[i * nsource + j] /= rowsum; /* normalize each row */
2635   }
2636   PetscCall(PetscFree4(bdegrees, sourcey, Bsource, work));
2637   PetscCall(PetscFree4(tau, Bsinv, targety, Btarget));
2638   PetscFunctionReturn(PETSC_SUCCESS);
2639 }
2640 
2641 /*@C
2642    PetscGaussLobattoLegendreIntegrate - Compute the L2 integral of a function on the GLL points
2643 
2644    Not Collective
2645 
2646    Input Parameters:
2647 +  n - the number of GLL nodes
2648 .  nodes - the GLL nodes
2649 .  weights - the GLL weights
2650 -  f - the function values at the nodes
2651 
2652    Output Parameter:
2653 .  in - the value of the integral
2654 
2655    Level: beginner
2656 
2657 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`
2658 @*/
2659 PetscErrorCode PetscGaussLobattoLegendreIntegrate(PetscInt n, PetscReal *nodes, PetscReal *weights, const PetscReal *f, PetscReal *in)
2660 {
2661   PetscInt i;
2662 
2663   PetscFunctionBegin;
2664   *in = 0.;
2665   for (i = 0; i < n; i++) *in += f[i] * f[i] * weights[i];
2666   PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668 
2669 /*@C
2670    PetscGaussLobattoLegendreElementLaplacianCreate - computes the Laplacian for a single 1d GLL element
2671 
2672    Not Collective
2673 
2674    Input Parameters:
2675 +  n - the number of GLL nodes
2676 .  nodes - the GLL nodes
2677 -  weights - the GLL weights
2678 
2679    Output Parameter:
2680 .  A - the stiffness element
2681 
2682    Level: beginner
2683 
2684    Notes:
2685    Destroy this with `PetscGaussLobattoLegendreElementLaplacianDestroy()`
2686 
2687    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented (the array is symmetric)
2688 
2689 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`
2690 @*/
2691 PetscErrorCode PetscGaussLobattoLegendreElementLaplacianCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2692 {
2693   PetscReal      **A;
2694   const PetscReal *gllnodes = nodes;
2695   const PetscInt   p        = n - 1;
2696   PetscReal        z0, z1, z2 = -1, x, Lpj, Lpr;
2697   PetscInt         i, j, nn, r;
2698 
2699   PetscFunctionBegin;
2700   PetscCall(PetscMalloc1(n, &A));
2701   PetscCall(PetscMalloc1(n * n, &A[0]));
2702   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;
2703 
2704   for (j = 1; j < p; j++) {
2705     x  = gllnodes[j];
2706     z0 = 1.;
2707     z1 = x;
2708     for (nn = 1; nn < p; nn++) {
2709       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2710       z0 = z1;
2711       z1 = z2;
2712     }
2713     Lpj = z2;
2714     for (r = 1; r < p; r++) {
2715       if (r == j) {
2716         A[j][j] = 2. / (3. * (1. - gllnodes[j] * gllnodes[j]) * Lpj * Lpj);
2717       } else {
2718         x  = gllnodes[r];
2719         z0 = 1.;
2720         z1 = x;
2721         for (nn = 1; nn < p; nn++) {
2722           z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2723           z0 = z1;
2724           z1 = z2;
2725         }
2726         Lpr     = z2;
2727         A[r][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * Lpr * (gllnodes[j] - gllnodes[r]) * (gllnodes[j] - gllnodes[r]));
2728       }
2729     }
2730   }
2731   for (j = 1; j < p + 1; j++) {
2732     x  = gllnodes[j];
2733     z0 = 1.;
2734     z1 = x;
2735     for (nn = 1; nn < p; nn++) {
2736       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2737       z0 = z1;
2738       z1 = z2;
2739     }
2740     Lpj     = z2;
2741     A[j][0] = 4. * PetscPowRealInt(-1., p) / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. + gllnodes[j]) * (1. + gllnodes[j]));
2742     A[0][j] = A[j][0];
2743   }
2744   for (j = 0; j < p; j++) {
2745     x  = gllnodes[j];
2746     z0 = 1.;
2747     z1 = x;
2748     for (nn = 1; nn < p; nn++) {
2749       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2750       z0 = z1;
2751       z1 = z2;
2752     }
2753     Lpj = z2;
2754 
2755     A[p][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. - gllnodes[j]) * (1. - gllnodes[j]));
2756     A[j][p] = A[p][j];
2757   }
2758   A[0][0] = 0.5 + (((PetscReal)p) * (((PetscReal)p) + 1.) - 2.) / 6.;
2759   A[p][p] = A[0][0];
2760   *AA     = A;
2761   PetscFunctionReturn(PETSC_SUCCESS);
2762 }
2763 
2764 /*@C
2765    PetscGaussLobattoLegendreElementLaplacianDestroy - frees the Laplacian for a single 1d GLL element created with `PetscGaussLobattoLegendreElementLaplacianCreate()`
2766 
2767    Not Collective
2768 
2769    Input Parameters:
2770 +  n - the number of GLL nodes
2771 .  nodes - the GLL nodes
2772 .  weights - the GLL weightss
2773 -  A - the stiffness element
2774 
2775    Level: beginner
2776 
2777 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`
2778 @*/
2779 PetscErrorCode PetscGaussLobattoLegendreElementLaplacianDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2780 {
2781   PetscFunctionBegin;
2782   PetscCall(PetscFree((*AA)[0]));
2783   PetscCall(PetscFree(*AA));
2784   *AA = NULL;
2785   PetscFunctionReturn(PETSC_SUCCESS);
2786 }
2787 
2788 /*@C
2789    PetscGaussLobattoLegendreElementGradientCreate - computes the gradient for a single 1d GLL element
2790 
2791    Not Collective
2792 
2793    Input Parameter:
2794 +  n - the number of GLL nodes
2795 .  nodes - the GLL nodes
2796 .  weights - the GLL weights
2797 
2798    Output Parameters:
2799 .  AA - the stiffness element
2800 -  AAT - the transpose of AA (pass in `NULL` if you do not need this array)
2801 
2802    Level: beginner
2803 
2804    Notes:
2805    Destroy this with `PetscGaussLobattoLegendreElementGradientDestroy()`
2806 
2807    You can access entries in these arrays with AA[i][j] but in memory it is stored in contiguous memory, row oriented
2808 
2809 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`, `PetscGaussLobattoLegendreElementGradientDestroy()`
2810 @*/
2811 PetscErrorCode PetscGaussLobattoLegendreElementGradientCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2812 {
2813   PetscReal      **A, **AT = NULL;
2814   const PetscReal *gllnodes = nodes;
2815   const PetscInt   p        = n - 1;
2816   PetscReal        Li, Lj, d0;
2817   PetscInt         i, j;
2818 
2819   PetscFunctionBegin;
2820   PetscCall(PetscMalloc1(n, &A));
2821   PetscCall(PetscMalloc1(n * n, &A[0]));
2822   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;
2823 
2824   if (AAT) {
2825     PetscCall(PetscMalloc1(n, &AT));
2826     PetscCall(PetscMalloc1(n * n, &AT[0]));
2827     for (i = 1; i < n; i++) AT[i] = AT[i - 1] + n;
2828   }
2829 
2830   if (n == 1) A[0][0] = 0.;
2831   d0 = (PetscReal)p * ((PetscReal)p + 1.) / 4.;
2832   for (i = 0; i < n; i++) {
2833     for (j = 0; j < n; j++) {
2834       A[i][j] = 0.;
2835       PetscCall(PetscDTComputeJacobi(0., 0., p, gllnodes[i], &Li));
2836       PetscCall(PetscDTComputeJacobi(0., 0., p, gllnodes[j], &Lj));
2837       if (i != j) A[i][j] = Li / (Lj * (gllnodes[i] - gllnodes[j]));
2838       if ((j == i) && (i == 0)) A[i][j] = -d0;
2839       if (j == i && i == p) A[i][j] = d0;
2840       if (AT) AT[j][i] = A[i][j];
2841     }
2842   }
2843   if (AAT) *AAT = AT;
2844   *AA = A;
2845   PetscFunctionReturn(PETSC_SUCCESS);
2846 }
2847 
2848 /*@C
2849    PetscGaussLobattoLegendreElementGradientDestroy - frees the gradient for a single 1d GLL element obtained with `PetscGaussLobattoLegendreElementGradientCreate()`
2850 
2851    Not Collective
2852 
2853    Input Parameters:
2854 +  n - the number of GLL nodes
2855 .  nodes - the GLL nodes
2856 .  weights - the GLL weights
2857 .  AA - the stiffness element
2858 -  AAT - the transpose of the element
2859 
2860    Level: beginner
2861 
2862 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2863 @*/
2864 PetscErrorCode PetscGaussLobattoLegendreElementGradientDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2865 {
2866   PetscFunctionBegin;
2867   PetscCall(PetscFree((*AA)[0]));
2868   PetscCall(PetscFree(*AA));
2869   *AA = NULL;
2870   if (*AAT) {
2871     PetscCall(PetscFree((*AAT)[0]));
2872     PetscCall(PetscFree(*AAT));
2873     *AAT = NULL;
2874   }
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 /*@C
2879    PetscGaussLobattoLegendreElementAdvectionCreate - computes the advection operator for a single 1d GLL element
2880 
2881    Not Collective
2882 
2883    Input Parameters:
2884 +  n - the number of GLL nodes
2885 .  nodes - the GLL nodes
2886 -  weights - the GLL weightss
2887 
2888    Output Parameter:
2889 .  AA - the stiffness element
2890 
2891    Level: beginner
2892 
2893    Notes:
2894    Destroy this with `PetscGaussLobattoLegendreElementAdvectionDestroy()`
2895 
2896    This is the same as the Gradient operator multiplied by the diagonal mass matrix
2897 
2898    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented
2899 
2900 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionDestroy()`
2901 @*/
2902 PetscErrorCode PetscGaussLobattoLegendreElementAdvectionCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2903 {
2904   PetscReal      **D;
2905   const PetscReal *gllweights = weights;
2906   const PetscInt   glln       = n;
2907   PetscInt         i, j;
2908 
2909   PetscFunctionBegin;
2910   PetscCall(PetscGaussLobattoLegendreElementGradientCreate(n, nodes, weights, &D, NULL));
2911   for (i = 0; i < glln; i++) {
2912     for (j = 0; j < glln; j++) D[i][j] = gllweights[i] * D[i][j];
2913   }
2914   *AA = D;
2915   PetscFunctionReturn(PETSC_SUCCESS);
2916 }
2917 
2918 /*@C
2919    PetscGaussLobattoLegendreElementAdvectionDestroy - frees the advection stiffness for a single 1d GLL element created with `PetscGaussLobattoLegendreElementAdvectionCreate()`
2920 
2921    Not Collective
2922 
2923    Input Parameters:
2924 +  n - the number of GLL nodes
2925 .  nodes - the GLL nodes
2926 .  weights - the GLL weights
2927 -  A - advection
2928 
2929    Level: beginner
2930 
2931 .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2932 @*/
2933 PetscErrorCode PetscGaussLobattoLegendreElementAdvectionDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2934 {
2935   PetscFunctionBegin;
2936   PetscCall(PetscFree((*AA)[0]));
2937   PetscCall(PetscFree(*AA));
2938   *AA = NULL;
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941 
2942 PetscErrorCode PetscGaussLobattoLegendreElementMassCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2943 {
2944   PetscReal      **A;
2945   const PetscReal *gllweights = weights;
2946   const PetscInt   glln       = n;
2947   PetscInt         i, j;
2948 
2949   PetscFunctionBegin;
2950   PetscCall(PetscMalloc1(glln, &A));
2951   PetscCall(PetscMalloc1(glln * glln, &A[0]));
2952   for (i = 1; i < glln; i++) A[i] = A[i - 1] + glln;
2953   if (glln == 1) A[0][0] = 0.;
2954   for (i = 0; i < glln; i++) {
2955     for (j = 0; j < glln; j++) {
2956       A[i][j] = 0.;
2957       if (j == i) A[i][j] = gllweights[i];
2958     }
2959   }
2960   *AA = A;
2961   PetscFunctionReturn(PETSC_SUCCESS);
2962 }
2963 
2964 PetscErrorCode PetscGaussLobattoLegendreElementMassDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2965 {
2966   PetscFunctionBegin;
2967   PetscCall(PetscFree((*AA)[0]));
2968   PetscCall(PetscFree(*AA));
2969   *AA = NULL;
2970   PetscFunctionReturn(PETSC_SUCCESS);
2971 }
2972 
2973 /*@
2974   PetscDTIndexToBary - convert an index into a barycentric coordinate.
2975 
2976   Input Parameters:
2977 + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
2978 . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
2979 - index - the index to convert: should be >= 0 and < Binomial(len - 1 + sum, sum)
2980 
2981   Output Parameter:
2982 . coord - will be filled with the barycentric coordinate
2983 
2984   Level: beginner
2985 
2986   Note:
2987   The indices map to barycentric coordinates in lexicographic order, where the first index is the
2988   least significant and the last index is the most significant.
2989 
2990 .seealso: `PetscDTBaryToIndex()`
2991 @*/
2992 PetscErrorCode PetscDTIndexToBary(PetscInt len, PetscInt sum, PetscInt index, PetscInt coord[])
2993 {
2994   PetscInt c, d, s, total, subtotal, nexttotal;
2995 
2996   PetscFunctionBeginHot;
2997   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
2998   PetscCheck(index >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index must be non-negative");
2999   if (!len) {
3000     if (!sum && !index) PetscFunctionReturn(PETSC_SUCCESS);
3001     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
3002   }
3003   for (c = 1, total = 1; c <= len; c++) {
3004     /* total is the number of ways to have a tuple of length c with sum */
3005     if (index < total) break;
3006     total = (total * (sum + c)) / c;
3007   }
3008   PetscCheck(c <= len, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index out of range");
3009   for (d = c; d < len; d++) coord[d] = 0;
3010   for (s = 0, subtotal = 1, nexttotal = 1; c > 0;) {
3011     /* subtotal is the number of ways to have a tuple of length c with sum s */
3012     /* nexttotal is the number of ways to have a tuple of length c-1 with sum s */
3013     if ((index + subtotal) >= total) {
3014       coord[--c] = sum - s;
3015       index -= (total - subtotal);
3016       sum       = s;
3017       total     = nexttotal;
3018       subtotal  = 1;
3019       nexttotal = 1;
3020       s         = 0;
3021     } else {
3022       subtotal  = (subtotal * (c + s)) / (s + 1);
3023       nexttotal = (nexttotal * (c - 1 + s)) / (s + 1);
3024       s++;
3025     }
3026   }
3027   PetscFunctionReturn(PETSC_SUCCESS);
3028 }
3029 
3030 /*@
3031   PetscDTBaryToIndex - convert a barycentric coordinate to an index
3032 
3033   Input Parameters:
3034 + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
3035 . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
3036 - coord - a barycentric coordinate with the given length and sum
3037 
3038   Output Parameter:
3039 . index - the unique index for the coordinate, >= 0 and < Binomial(len - 1 + sum, sum)
3040 
3041   Level: beginner
3042 
3043   Note:
3044   The indices map to barycentric coordinates in lexicographic order, where the first index is the
3045   least significant and the last index is the most significant.
3046 
3047 .seealso: `PetscDTIndexToBary`
3048 @*/
3049 PetscErrorCode PetscDTBaryToIndex(PetscInt len, PetscInt sum, const PetscInt coord[], PetscInt *index)
3050 {
3051   PetscInt c;
3052   PetscInt i;
3053   PetscInt total;
3054 
3055   PetscFunctionBeginHot;
3056   PetscCheck(len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "length must be non-negative");
3057   if (!len) {
3058     if (!sum) {
3059       *index = 0;
3060       PetscFunctionReturn(PETSC_SUCCESS);
3061     }
3062     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
3063   }
3064   for (c = 1, total = 1; c < len; c++) total = (total * (sum + c)) / c;
3065   i = total - 1;
3066   c = len - 1;
3067   sum -= coord[c];
3068   while (sum > 0) {
3069     PetscInt subtotal;
3070     PetscInt s;
3071 
3072     for (s = 1, subtotal = 1; s < sum; s++) subtotal = (subtotal * (c + s)) / s;
3073     i -= subtotal;
3074     sum -= coord[--c];
3075   }
3076   *index = i;
3077   PetscFunctionReturn(PETSC_SUCCESS);
3078 }
3079 
3080 /*
3081 1) For each face type:
3082      For each transformation from outward to inward normal:
3083          Compute the permutation of quadrature points:
3084             Compute the quad point coordinates from each side and compare
3085 */
3086 PetscErrorCode PetscDTComputeFaceQuadPermutation(DMPolytopeType ct, PetscQuadrature quad, PetscInt *Np, IS *perm[])
3087 {
3088   const PetscReal *xq, *wq;
3089   PetscInt         dim, qdim, d, Na, o, Nq, q, qp;
3090 
3091   PetscFunctionBegin;
3092   dim = DMPolytopeTypeGetDim(ct);
3093   Na  = DMPolytopeTypeGetNumArrangments(ct);
3094   Na /= 2;
3095   PetscCall(PetscQuadratureGetData(quad, &qdim, NULL, &Nq, &xq, &wq));
3096   PetscCheck(dim >= 0 && dim < 3, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot compute quadrature permutation for cell type %s", DMPolytopeTypes[ct]);
3097   PetscCheck(dim == qdim, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Celltype %s dimension %" PetscInt_FMT " != %" PetscInt_FMT " quad dimension", DMPolytopeTypes[ct], dim, qdim);
3098   *Np = Na;
3099   PetscCall(PetscMalloc1(Na, perm));
3100   for (o = -1; o > -(Na + 1); --o) {
3101     DM        refdm;
3102     PetscInt *idx;
3103     PetscReal xi0[3] = {-1., -1., -1.}, v0[3], J[9], detJ, txq[3];
3104     PetscBool flg;
3105 
3106     PetscCall(DMPlexCreateReferenceCell(PETSC_COMM_SELF, ct, &refdm));
3107     PetscCall(DMPlexOrientPoint(refdm, 0, o));
3108     PetscCall(DMPlexComputeCellGeometryFEM(refdm, 0, NULL, v0, J, NULL, &detJ));
3109     PetscCall(PetscMalloc1(Nq, &idx));
3110     for (q = 0; q < Nq; ++q) {
3111       CoordinatesRefToReal(dim, dim, xi0, v0, J, &xq[q * dim], txq);
3112       for (qp = 0; qp < Nq; ++qp) {
3113         PetscReal diff = 0.;
3114 
3115         for (d = 0; d < dim; ++d) diff += PetscAbsReal(txq[d] - xq[qp * dim + d]);
3116         if (diff < PETSC_SMALL) break;
3117       }
3118       PetscCheck(qp < Nq, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Transformed quad point %" PetscInt_FMT " does not match another quad point", q);
3119       idx[q] = qp;
3120     }
3121     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, Nq, idx, PETSC_OWN_POINTER, &(*perm)[-(o + 1)]));
3122     PetscCall(ISGetInfo((*perm)[-(o + 1)], IS_PERMUTATION, IS_LOCAL, PETSC_TRUE, &flg));
3123     PetscCall(DMDestroy(&refdm));
3124     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Ordering for orientation %" PetscInt_FMT "was not a permutation", o);
3125   }
3126   PetscFunctionReturn(PETSC_SUCCESS);
3127 }
3128