xref: /petsc/src/mat/impls/baij/seq/baijsolvnat14.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith 
42c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */
52c733ed4SBarry Smith /* Default MatSolve for block size 14 */
62c733ed4SBarry Smith 
7*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx) {
82c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
92c733ed4SBarry Smith   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
102c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt, m;
112c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
122c733ed4SBarry Smith   PetscScalar        s[14];
132c733ed4SBarry Smith   PetscScalar       *x, xv;
142c733ed4SBarry Smith   const PetscScalar *b;
152c733ed4SBarry Smith 
162c733ed4SBarry Smith   PetscFunctionBegin;
179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
192c733ed4SBarry Smith 
202c733ed4SBarry Smith   /* forward solve the lower triangular */
212c733ed4SBarry Smith   for (i = 0; i < n; i++) {
222c733ed4SBarry Smith     v           = aa + bs2 * ai[i];
232c733ed4SBarry Smith     vi          = aj + ai[i];
242c733ed4SBarry Smith     nz          = ai[i + 1] - ai[i];
252c733ed4SBarry Smith     idt         = bs * i;
26*9371c9d4SSatish Balay     x[idt]      = b[idt];
27*9371c9d4SSatish Balay     x[1 + idt]  = b[1 + idt];
28*9371c9d4SSatish Balay     x[2 + idt]  = b[2 + idt];
29*9371c9d4SSatish Balay     x[3 + idt]  = b[3 + idt];
30*9371c9d4SSatish Balay     x[4 + idt]  = b[4 + idt];
31*9371c9d4SSatish Balay     x[5 + idt]  = b[5 + idt];
32*9371c9d4SSatish Balay     x[6 + idt]  = b[6 + idt];
33*9371c9d4SSatish Balay     x[7 + idt]  = b[7 + idt];
34*9371c9d4SSatish Balay     x[8 + idt]  = b[8 + idt];
35*9371c9d4SSatish Balay     x[9 + idt]  = b[9 + idt];
36*9371c9d4SSatish Balay     x[10 + idt] = b[10 + idt];
37*9371c9d4SSatish Balay     x[11 + idt] = b[11 + idt];
38*9371c9d4SSatish Balay     x[12 + idt] = b[12 + idt];
39*9371c9d4SSatish Balay     x[13 + idt] = b[13 + idt];
402c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
412c733ed4SBarry Smith       idx = bs * vi[m];
422c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
432c733ed4SBarry Smith         xv = x[k + idx];
442c733ed4SBarry Smith         x[idt] -= v[0] * xv;
452c733ed4SBarry Smith         x[1 + idt] -= v[1] * xv;
462c733ed4SBarry Smith         x[2 + idt] -= v[2] * xv;
472c733ed4SBarry Smith         x[3 + idt] -= v[3] * xv;
482c733ed4SBarry Smith         x[4 + idt] -= v[4] * xv;
492c733ed4SBarry Smith         x[5 + idt] -= v[5] * xv;
502c733ed4SBarry Smith         x[6 + idt] -= v[6] * xv;
512c733ed4SBarry Smith         x[7 + idt] -= v[7] * xv;
522c733ed4SBarry Smith         x[8 + idt] -= v[8] * xv;
532c733ed4SBarry Smith         x[9 + idt] -= v[9] * xv;
542c733ed4SBarry Smith         x[10 + idt] -= v[10] * xv;
552c733ed4SBarry Smith         x[11 + idt] -= v[11] * xv;
562c733ed4SBarry Smith         x[12 + idt] -= v[12] * xv;
572c733ed4SBarry Smith         x[13 + idt] -= v[13] * xv;
582c733ed4SBarry Smith         v += 14;
592c733ed4SBarry Smith       }
602c733ed4SBarry Smith     }
612c733ed4SBarry Smith   }
622c733ed4SBarry Smith   /* backward solve the upper triangular */
632c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
642c733ed4SBarry Smith     v     = aa + bs2 * (adiag[i + 1] + 1);
652c733ed4SBarry Smith     vi    = aj + adiag[i + 1] + 1;
662c733ed4SBarry Smith     nz    = adiag[i] - adiag[i + 1] - 1;
672c733ed4SBarry Smith     idt   = bs * i;
68*9371c9d4SSatish Balay     s[0]  = x[idt];
69*9371c9d4SSatish Balay     s[1]  = x[1 + idt];
70*9371c9d4SSatish Balay     s[2]  = x[2 + idt];
71*9371c9d4SSatish Balay     s[3]  = x[3 + idt];
72*9371c9d4SSatish Balay     s[4]  = x[4 + idt];
73*9371c9d4SSatish Balay     s[5]  = x[5 + idt];
74*9371c9d4SSatish Balay     s[6]  = x[6 + idt];
75*9371c9d4SSatish Balay     s[7]  = x[7 + idt];
76*9371c9d4SSatish Balay     s[8]  = x[8 + idt];
77*9371c9d4SSatish Balay     s[9]  = x[9 + idt];
78*9371c9d4SSatish Balay     s[10] = x[10 + idt];
79*9371c9d4SSatish Balay     s[11] = x[11 + idt];
80*9371c9d4SSatish Balay     s[12] = x[12 + idt];
81*9371c9d4SSatish Balay     s[13] = x[13 + idt];
822c733ed4SBarry Smith 
832c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
842c733ed4SBarry Smith       idx = bs * vi[m];
852c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
862c733ed4SBarry Smith         xv = x[k + idx];
872c733ed4SBarry Smith         s[0] -= v[0] * xv;
882c733ed4SBarry Smith         s[1] -= v[1] * xv;
892c733ed4SBarry Smith         s[2] -= v[2] * xv;
902c733ed4SBarry Smith         s[3] -= v[3] * xv;
912c733ed4SBarry Smith         s[4] -= v[4] * xv;
922c733ed4SBarry Smith         s[5] -= v[5] * xv;
932c733ed4SBarry Smith         s[6] -= v[6] * xv;
942c733ed4SBarry Smith         s[7] -= v[7] * xv;
952c733ed4SBarry Smith         s[8] -= v[8] * xv;
962c733ed4SBarry Smith         s[9] -= v[9] * xv;
972c733ed4SBarry Smith         s[10] -= v[10] * xv;
982c733ed4SBarry Smith         s[11] -= v[11] * xv;
992c733ed4SBarry Smith         s[12] -= v[12] * xv;
1002c733ed4SBarry Smith         s[13] -= v[13] * xv;
1012c733ed4SBarry Smith         v += 14;
1022c733ed4SBarry Smith       }
1032c733ed4SBarry Smith     }
1049566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(x + idt, bs));
1052c733ed4SBarry Smith     for (k = 0; k < bs; k++) {
1062c733ed4SBarry Smith       x[idt] += v[0] * s[k];
1072c733ed4SBarry Smith       x[1 + idt] += v[1] * s[k];
1082c733ed4SBarry Smith       x[2 + idt] += v[2] * s[k];
1092c733ed4SBarry Smith       x[3 + idt] += v[3] * s[k];
1102c733ed4SBarry Smith       x[4 + idt] += v[4] * s[k];
1112c733ed4SBarry Smith       x[5 + idt] += v[5] * s[k];
1122c733ed4SBarry Smith       x[6 + idt] += v[6] * s[k];
1132c733ed4SBarry Smith       x[7 + idt] += v[7] * s[k];
1142c733ed4SBarry Smith       x[8 + idt] += v[8] * s[k];
1152c733ed4SBarry Smith       x[9 + idt] += v[9] * s[k];
1162c733ed4SBarry Smith       x[10 + idt] += v[10] * s[k];
1172c733ed4SBarry Smith       x[11 + idt] += v[11] * s[k];
1182c733ed4SBarry Smith       x[12 + idt] += v[12] * s[k];
1192c733ed4SBarry Smith       x[13 + idt] += v[13] * s[k];
1202c733ed4SBarry Smith       v += 14;
1212c733ed4SBarry Smith     }
1222c733ed4SBarry Smith   }
1239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
1262c733ed4SBarry Smith   PetscFunctionReturn(0);
1272c733ed4SBarry Smith }
1282c733ed4SBarry Smith 
1292c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */
1302c733ed4SBarry Smith /* Default MatSolve for block size 13 */
1312c733ed4SBarry Smith 
132*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx) {
1332c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1342c733ed4SBarry Smith   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
1352c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt, m;
1362c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
1372c733ed4SBarry Smith   PetscScalar        s[13];
1382c733ed4SBarry Smith   PetscScalar       *x, xv;
1392c733ed4SBarry Smith   const PetscScalar *b;
1402c733ed4SBarry Smith 
1412c733ed4SBarry Smith   PetscFunctionBegin;
1429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
1439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1442c733ed4SBarry Smith 
1452c733ed4SBarry Smith   /* forward solve the lower triangular */
1462c733ed4SBarry Smith   for (i = 0; i < n; i++) {
1472c733ed4SBarry Smith     v           = aa + bs2 * ai[i];
1482c733ed4SBarry Smith     vi          = aj + ai[i];
1492c733ed4SBarry Smith     nz          = ai[i + 1] - ai[i];
1502c733ed4SBarry Smith     idt         = bs * i;
151*9371c9d4SSatish Balay     x[idt]      = b[idt];
152*9371c9d4SSatish Balay     x[1 + idt]  = b[1 + idt];
153*9371c9d4SSatish Balay     x[2 + idt]  = b[2 + idt];
154*9371c9d4SSatish Balay     x[3 + idt]  = b[3 + idt];
155*9371c9d4SSatish Balay     x[4 + idt]  = b[4 + idt];
156*9371c9d4SSatish Balay     x[5 + idt]  = b[5 + idt];
157*9371c9d4SSatish Balay     x[6 + idt]  = b[6 + idt];
158*9371c9d4SSatish Balay     x[7 + idt]  = b[7 + idt];
159*9371c9d4SSatish Balay     x[8 + idt]  = b[8 + idt];
160*9371c9d4SSatish Balay     x[9 + idt]  = b[9 + idt];
161*9371c9d4SSatish Balay     x[10 + idt] = b[10 + idt];
162*9371c9d4SSatish Balay     x[11 + idt] = b[11 + idt];
163*9371c9d4SSatish Balay     x[12 + idt] = b[12 + idt];
1642c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
1652c733ed4SBarry Smith       idx = bs * vi[m];
1662c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
1672c733ed4SBarry Smith         xv = x[k + idx];
1682c733ed4SBarry Smith         x[idt] -= v[0] * xv;
1692c733ed4SBarry Smith         x[1 + idt] -= v[1] * xv;
1702c733ed4SBarry Smith         x[2 + idt] -= v[2] * xv;
1712c733ed4SBarry Smith         x[3 + idt] -= v[3] * xv;
1722c733ed4SBarry Smith         x[4 + idt] -= v[4] * xv;
1732c733ed4SBarry Smith         x[5 + idt] -= v[5] * xv;
1742c733ed4SBarry Smith         x[6 + idt] -= v[6] * xv;
1752c733ed4SBarry Smith         x[7 + idt] -= v[7] * xv;
1762c733ed4SBarry Smith         x[8 + idt] -= v[8] * xv;
1772c733ed4SBarry Smith         x[9 + idt] -= v[9] * xv;
1782c733ed4SBarry Smith         x[10 + idt] -= v[10] * xv;
1792c733ed4SBarry Smith         x[11 + idt] -= v[11] * xv;
1802c733ed4SBarry Smith         x[12 + idt] -= v[12] * xv;
1812c733ed4SBarry Smith         v += 13;
1822c733ed4SBarry Smith       }
1832c733ed4SBarry Smith     }
1842c733ed4SBarry Smith   }
1852c733ed4SBarry Smith   /* backward solve the upper triangular */
1862c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
1872c733ed4SBarry Smith     v     = aa + bs2 * (adiag[i + 1] + 1);
1882c733ed4SBarry Smith     vi    = aj + adiag[i + 1] + 1;
1892c733ed4SBarry Smith     nz    = adiag[i] - adiag[i + 1] - 1;
1902c733ed4SBarry Smith     idt   = bs * i;
191*9371c9d4SSatish Balay     s[0]  = x[idt];
192*9371c9d4SSatish Balay     s[1]  = x[1 + idt];
193*9371c9d4SSatish Balay     s[2]  = x[2 + idt];
194*9371c9d4SSatish Balay     s[3]  = x[3 + idt];
195*9371c9d4SSatish Balay     s[4]  = x[4 + idt];
196*9371c9d4SSatish Balay     s[5]  = x[5 + idt];
197*9371c9d4SSatish Balay     s[6]  = x[6 + idt];
198*9371c9d4SSatish Balay     s[7]  = x[7 + idt];
199*9371c9d4SSatish Balay     s[8]  = x[8 + idt];
200*9371c9d4SSatish Balay     s[9]  = x[9 + idt];
201*9371c9d4SSatish Balay     s[10] = x[10 + idt];
202*9371c9d4SSatish Balay     s[11] = x[11 + idt];
203*9371c9d4SSatish Balay     s[12] = x[12 + idt];
2042c733ed4SBarry Smith 
2052c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
2062c733ed4SBarry Smith       idx = bs * vi[m];
2072c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
2082c733ed4SBarry Smith         xv = x[k + idx];
2092c733ed4SBarry Smith         s[0] -= v[0] * xv;
2102c733ed4SBarry Smith         s[1] -= v[1] * xv;
2112c733ed4SBarry Smith         s[2] -= v[2] * xv;
2122c733ed4SBarry Smith         s[3] -= v[3] * xv;
2132c733ed4SBarry Smith         s[4] -= v[4] * xv;
2142c733ed4SBarry Smith         s[5] -= v[5] * xv;
2152c733ed4SBarry Smith         s[6] -= v[6] * xv;
2162c733ed4SBarry Smith         s[7] -= v[7] * xv;
2172c733ed4SBarry Smith         s[8] -= v[8] * xv;
2182c733ed4SBarry Smith         s[9] -= v[9] * xv;
2192c733ed4SBarry Smith         s[10] -= v[10] * xv;
2202c733ed4SBarry Smith         s[11] -= v[11] * xv;
2212c733ed4SBarry Smith         s[12] -= v[12] * xv;
2222c733ed4SBarry Smith         v += 13;
2232c733ed4SBarry Smith       }
2242c733ed4SBarry Smith     }
2259566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(x + idt, bs));
2262c733ed4SBarry Smith     for (k = 0; k < bs; k++) {
2272c733ed4SBarry Smith       x[idt] += v[0] * s[k];
2282c733ed4SBarry Smith       x[1 + idt] += v[1] * s[k];
2292c733ed4SBarry Smith       x[2 + idt] += v[2] * s[k];
2302c733ed4SBarry Smith       x[3 + idt] += v[3] * s[k];
2312c733ed4SBarry Smith       x[4 + idt] += v[4] * s[k];
2322c733ed4SBarry Smith       x[5 + idt] += v[5] * s[k];
2332c733ed4SBarry Smith       x[6 + idt] += v[6] * s[k];
2342c733ed4SBarry Smith       x[7 + idt] += v[7] * s[k];
2352c733ed4SBarry Smith       x[8 + idt] += v[8] * s[k];
2362c733ed4SBarry Smith       x[9 + idt] += v[9] * s[k];
2372c733ed4SBarry Smith       x[10 + idt] += v[10] * s[k];
2382c733ed4SBarry Smith       x[11 + idt] += v[11] * s[k];
2392c733ed4SBarry Smith       x[12 + idt] += v[12] * s[k];
2402c733ed4SBarry Smith       v += 13;
2412c733ed4SBarry Smith     }
2422c733ed4SBarry Smith   }
2439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
2449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
2459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
2462c733ed4SBarry Smith   PetscFunctionReturn(0);
2472c733ed4SBarry Smith }
2482c733ed4SBarry Smith 
2492c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */
2502c733ed4SBarry Smith /* Default MatSolve for block size 12 */
2512c733ed4SBarry Smith 
252*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx) {
2532c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2542c733ed4SBarry Smith   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
2552c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt, m;
2562c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
2572c733ed4SBarry Smith   PetscScalar        s[12];
2582c733ed4SBarry Smith   PetscScalar       *x, xv;
2592c733ed4SBarry Smith   const PetscScalar *b;
2602c733ed4SBarry Smith 
2612c733ed4SBarry Smith   PetscFunctionBegin;
2629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
2639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2642c733ed4SBarry Smith 
2652c733ed4SBarry Smith   /* forward solve the lower triangular */
2662c733ed4SBarry Smith   for (i = 0; i < n; i++) {
2672c733ed4SBarry Smith     v           = aa + bs2 * ai[i];
2682c733ed4SBarry Smith     vi          = aj + ai[i];
2692c733ed4SBarry Smith     nz          = ai[i + 1] - ai[i];
2702c733ed4SBarry Smith     idt         = bs * i;
271*9371c9d4SSatish Balay     x[idt]      = b[idt];
272*9371c9d4SSatish Balay     x[1 + idt]  = b[1 + idt];
273*9371c9d4SSatish Balay     x[2 + idt]  = b[2 + idt];
274*9371c9d4SSatish Balay     x[3 + idt]  = b[3 + idt];
275*9371c9d4SSatish Balay     x[4 + idt]  = b[4 + idt];
276*9371c9d4SSatish Balay     x[5 + idt]  = b[5 + idt];
277*9371c9d4SSatish Balay     x[6 + idt]  = b[6 + idt];
278*9371c9d4SSatish Balay     x[7 + idt]  = b[7 + idt];
279*9371c9d4SSatish Balay     x[8 + idt]  = b[8 + idt];
280*9371c9d4SSatish Balay     x[9 + idt]  = b[9 + idt];
281*9371c9d4SSatish Balay     x[10 + idt] = b[10 + idt];
282*9371c9d4SSatish Balay     x[11 + idt] = b[11 + idt];
2832c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
2842c733ed4SBarry Smith       idx = bs * vi[m];
2852c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
2862c733ed4SBarry Smith         xv = x[k + idx];
2872c733ed4SBarry Smith         x[idt] -= v[0] * xv;
2882c733ed4SBarry Smith         x[1 + idt] -= v[1] * xv;
2892c733ed4SBarry Smith         x[2 + idt] -= v[2] * xv;
2902c733ed4SBarry Smith         x[3 + idt] -= v[3] * xv;
2912c733ed4SBarry Smith         x[4 + idt] -= v[4] * xv;
2922c733ed4SBarry Smith         x[5 + idt] -= v[5] * xv;
2932c733ed4SBarry Smith         x[6 + idt] -= v[6] * xv;
2942c733ed4SBarry Smith         x[7 + idt] -= v[7] * xv;
2952c733ed4SBarry Smith         x[8 + idt] -= v[8] * xv;
2962c733ed4SBarry Smith         x[9 + idt] -= v[9] * xv;
2972c733ed4SBarry Smith         x[10 + idt] -= v[10] * xv;
2982c733ed4SBarry Smith         x[11 + idt] -= v[11] * xv;
2992c733ed4SBarry Smith         v += 12;
3002c733ed4SBarry Smith       }
3012c733ed4SBarry Smith     }
3022c733ed4SBarry Smith   }
3032c733ed4SBarry Smith   /* backward solve the upper triangular */
3042c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
3052c733ed4SBarry Smith     v     = aa + bs2 * (adiag[i + 1] + 1);
3062c733ed4SBarry Smith     vi    = aj + adiag[i + 1] + 1;
3072c733ed4SBarry Smith     nz    = adiag[i] - adiag[i + 1] - 1;
3082c733ed4SBarry Smith     idt   = bs * i;
309*9371c9d4SSatish Balay     s[0]  = x[idt];
310*9371c9d4SSatish Balay     s[1]  = x[1 + idt];
311*9371c9d4SSatish Balay     s[2]  = x[2 + idt];
312*9371c9d4SSatish Balay     s[3]  = x[3 + idt];
313*9371c9d4SSatish Balay     s[4]  = x[4 + idt];
314*9371c9d4SSatish Balay     s[5]  = x[5 + idt];
315*9371c9d4SSatish Balay     s[6]  = x[6 + idt];
316*9371c9d4SSatish Balay     s[7]  = x[7 + idt];
317*9371c9d4SSatish Balay     s[8]  = x[8 + idt];
318*9371c9d4SSatish Balay     s[9]  = x[9 + idt];
319*9371c9d4SSatish Balay     s[10] = x[10 + idt];
320*9371c9d4SSatish Balay     s[11] = x[11 + idt];
3212c733ed4SBarry Smith 
3222c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
3232c733ed4SBarry Smith       idx = bs * vi[m];
3242c733ed4SBarry Smith       for (k = 0; k < bs; k++) {
3252c733ed4SBarry Smith         xv = x[k + idx];
3262c733ed4SBarry Smith         s[0] -= v[0] * xv;
3272c733ed4SBarry Smith         s[1] -= v[1] * xv;
3282c733ed4SBarry Smith         s[2] -= v[2] * xv;
3292c733ed4SBarry Smith         s[3] -= v[3] * xv;
3302c733ed4SBarry Smith         s[4] -= v[4] * xv;
3312c733ed4SBarry Smith         s[5] -= v[5] * xv;
3322c733ed4SBarry Smith         s[6] -= v[6] * xv;
3332c733ed4SBarry Smith         s[7] -= v[7] * xv;
3342c733ed4SBarry Smith         s[8] -= v[8] * xv;
3352c733ed4SBarry Smith         s[9] -= v[9] * xv;
3362c733ed4SBarry Smith         s[10] -= v[10] * xv;
3372c733ed4SBarry Smith         s[11] -= v[11] * xv;
3382c733ed4SBarry Smith         v += 12;
3392c733ed4SBarry Smith       }
3402c733ed4SBarry Smith     }
3419566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(x + idt, bs));
3422c733ed4SBarry Smith     for (k = 0; k < bs; k++) {
3432c733ed4SBarry Smith       x[idt] += v[0] * s[k];
3442c733ed4SBarry Smith       x[1 + idt] += v[1] * s[k];
3452c733ed4SBarry Smith       x[2 + idt] += v[2] * s[k];
3462c733ed4SBarry Smith       x[3 + idt] += v[3] * s[k];
3472c733ed4SBarry Smith       x[4 + idt] += v[4] * s[k];
3482c733ed4SBarry Smith       x[5 + idt] += v[5] * s[k];
3492c733ed4SBarry Smith       x[6 + idt] += v[6] * s[k];
3502c733ed4SBarry Smith       x[7 + idt] += v[7] * s[k];
3512c733ed4SBarry Smith       x[8 + idt] += v[8] * s[k];
3522c733ed4SBarry Smith       x[9 + idt] += v[9] * s[k];
3532c733ed4SBarry Smith       x[10 + idt] += v[10] * s[k];
3542c733ed4SBarry Smith       x[11 + idt] += v[11] * s[k];
3552c733ed4SBarry Smith       v += 12;
3562c733ed4SBarry Smith     }
3572c733ed4SBarry Smith   }
3589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
3599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
3609566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
3612c733ed4SBarry Smith   PetscFunctionReturn(0);
3622c733ed4SBarry Smith }
363