12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h> 22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h> 32c733ed4SBarry Smith 42c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */ 52c733ed4SBarry Smith /* Default MatSolve for block size 14 */ 62c733ed4SBarry Smith 7*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx) { 82c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 92c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 102c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 112c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 122c733ed4SBarry Smith PetscScalar s[14]; 132c733ed4SBarry Smith PetscScalar *x, xv; 142c733ed4SBarry Smith const PetscScalar *b; 152c733ed4SBarry Smith 162c733ed4SBarry Smith PetscFunctionBegin; 179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 189566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 192c733ed4SBarry Smith 202c733ed4SBarry Smith /* forward solve the lower triangular */ 212c733ed4SBarry Smith for (i = 0; i < n; i++) { 222c733ed4SBarry Smith v = aa + bs2 * ai[i]; 232c733ed4SBarry Smith vi = aj + ai[i]; 242c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 252c733ed4SBarry Smith idt = bs * i; 26*9371c9d4SSatish Balay x[idt] = b[idt]; 27*9371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 28*9371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 29*9371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 30*9371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 31*9371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 32*9371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 33*9371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 34*9371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 35*9371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 36*9371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 37*9371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 38*9371c9d4SSatish Balay x[12 + idt] = b[12 + idt]; 39*9371c9d4SSatish Balay x[13 + idt] = b[13 + idt]; 402c733ed4SBarry Smith for (m = 0; m < nz; m++) { 412c733ed4SBarry Smith idx = bs * vi[m]; 422c733ed4SBarry Smith for (k = 0; k < bs; k++) { 432c733ed4SBarry Smith xv = x[k + idx]; 442c733ed4SBarry Smith x[idt] -= v[0] * xv; 452c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 462c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 472c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 482c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 492c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 502c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 512c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 522c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 532c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 542c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 552c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 562c733ed4SBarry Smith x[12 + idt] -= v[12] * xv; 572c733ed4SBarry Smith x[13 + idt] -= v[13] * xv; 582c733ed4SBarry Smith v += 14; 592c733ed4SBarry Smith } 602c733ed4SBarry Smith } 612c733ed4SBarry Smith } 622c733ed4SBarry Smith /* backward solve the upper triangular */ 632c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 642c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 652c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 662c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 672c733ed4SBarry Smith idt = bs * i; 68*9371c9d4SSatish Balay s[0] = x[idt]; 69*9371c9d4SSatish Balay s[1] = x[1 + idt]; 70*9371c9d4SSatish Balay s[2] = x[2 + idt]; 71*9371c9d4SSatish Balay s[3] = x[3 + idt]; 72*9371c9d4SSatish Balay s[4] = x[4 + idt]; 73*9371c9d4SSatish Balay s[5] = x[5 + idt]; 74*9371c9d4SSatish Balay s[6] = x[6 + idt]; 75*9371c9d4SSatish Balay s[7] = x[7 + idt]; 76*9371c9d4SSatish Balay s[8] = x[8 + idt]; 77*9371c9d4SSatish Balay s[9] = x[9 + idt]; 78*9371c9d4SSatish Balay s[10] = x[10 + idt]; 79*9371c9d4SSatish Balay s[11] = x[11 + idt]; 80*9371c9d4SSatish Balay s[12] = x[12 + idt]; 81*9371c9d4SSatish Balay s[13] = x[13 + idt]; 822c733ed4SBarry Smith 832c733ed4SBarry Smith for (m = 0; m < nz; m++) { 842c733ed4SBarry Smith idx = bs * vi[m]; 852c733ed4SBarry Smith for (k = 0; k < bs; k++) { 862c733ed4SBarry Smith xv = x[k + idx]; 872c733ed4SBarry Smith s[0] -= v[0] * xv; 882c733ed4SBarry Smith s[1] -= v[1] * xv; 892c733ed4SBarry Smith s[2] -= v[2] * xv; 902c733ed4SBarry Smith s[3] -= v[3] * xv; 912c733ed4SBarry Smith s[4] -= v[4] * xv; 922c733ed4SBarry Smith s[5] -= v[5] * xv; 932c733ed4SBarry Smith s[6] -= v[6] * xv; 942c733ed4SBarry Smith s[7] -= v[7] * xv; 952c733ed4SBarry Smith s[8] -= v[8] * xv; 962c733ed4SBarry Smith s[9] -= v[9] * xv; 972c733ed4SBarry Smith s[10] -= v[10] * xv; 982c733ed4SBarry Smith s[11] -= v[11] * xv; 992c733ed4SBarry Smith s[12] -= v[12] * xv; 1002c733ed4SBarry Smith s[13] -= v[13] * xv; 1012c733ed4SBarry Smith v += 14; 1022c733ed4SBarry Smith } 1032c733ed4SBarry Smith } 1049566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 1052c733ed4SBarry Smith for (k = 0; k < bs; k++) { 1062c733ed4SBarry Smith x[idt] += v[0] * s[k]; 1072c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 1082c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 1092c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 1102c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 1112c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 1122c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 1132c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 1142c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 1152c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 1162c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 1172c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 1182c733ed4SBarry Smith x[12 + idt] += v[12] * s[k]; 1192c733ed4SBarry Smith x[13 + idt] += v[13] * s[k]; 1202c733ed4SBarry Smith v += 14; 1212c733ed4SBarry Smith } 1222c733ed4SBarry Smith } 1239566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 1249566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 1262c733ed4SBarry Smith PetscFunctionReturn(0); 1272c733ed4SBarry Smith } 1282c733ed4SBarry Smith 1292c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */ 1302c733ed4SBarry Smith /* Default MatSolve for block size 13 */ 1312c733ed4SBarry Smith 132*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx) { 1332c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1342c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 1352c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 1362c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 1372c733ed4SBarry Smith PetscScalar s[13]; 1382c733ed4SBarry Smith PetscScalar *x, xv; 1392c733ed4SBarry Smith const PetscScalar *b; 1402c733ed4SBarry Smith 1412c733ed4SBarry Smith PetscFunctionBegin; 1429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 1439566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1442c733ed4SBarry Smith 1452c733ed4SBarry Smith /* forward solve the lower triangular */ 1462c733ed4SBarry Smith for (i = 0; i < n; i++) { 1472c733ed4SBarry Smith v = aa + bs2 * ai[i]; 1482c733ed4SBarry Smith vi = aj + ai[i]; 1492c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 1502c733ed4SBarry Smith idt = bs * i; 151*9371c9d4SSatish Balay x[idt] = b[idt]; 152*9371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 153*9371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 154*9371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 155*9371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 156*9371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 157*9371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 158*9371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 159*9371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 160*9371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 161*9371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 162*9371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 163*9371c9d4SSatish Balay x[12 + idt] = b[12 + idt]; 1642c733ed4SBarry Smith for (m = 0; m < nz; m++) { 1652c733ed4SBarry Smith idx = bs * vi[m]; 1662c733ed4SBarry Smith for (k = 0; k < bs; k++) { 1672c733ed4SBarry Smith xv = x[k + idx]; 1682c733ed4SBarry Smith x[idt] -= v[0] * xv; 1692c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 1702c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 1712c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 1722c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 1732c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 1742c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 1752c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 1762c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 1772c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 1782c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 1792c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 1802c733ed4SBarry Smith x[12 + idt] -= v[12] * xv; 1812c733ed4SBarry Smith v += 13; 1822c733ed4SBarry Smith } 1832c733ed4SBarry Smith } 1842c733ed4SBarry Smith } 1852c733ed4SBarry Smith /* backward solve the upper triangular */ 1862c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 1872c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 1882c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 1892c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 1902c733ed4SBarry Smith idt = bs * i; 191*9371c9d4SSatish Balay s[0] = x[idt]; 192*9371c9d4SSatish Balay s[1] = x[1 + idt]; 193*9371c9d4SSatish Balay s[2] = x[2 + idt]; 194*9371c9d4SSatish Balay s[3] = x[3 + idt]; 195*9371c9d4SSatish Balay s[4] = x[4 + idt]; 196*9371c9d4SSatish Balay s[5] = x[5 + idt]; 197*9371c9d4SSatish Balay s[6] = x[6 + idt]; 198*9371c9d4SSatish Balay s[7] = x[7 + idt]; 199*9371c9d4SSatish Balay s[8] = x[8 + idt]; 200*9371c9d4SSatish Balay s[9] = x[9 + idt]; 201*9371c9d4SSatish Balay s[10] = x[10 + idt]; 202*9371c9d4SSatish Balay s[11] = x[11 + idt]; 203*9371c9d4SSatish Balay s[12] = x[12 + idt]; 2042c733ed4SBarry Smith 2052c733ed4SBarry Smith for (m = 0; m < nz; m++) { 2062c733ed4SBarry Smith idx = bs * vi[m]; 2072c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2082c733ed4SBarry Smith xv = x[k + idx]; 2092c733ed4SBarry Smith s[0] -= v[0] * xv; 2102c733ed4SBarry Smith s[1] -= v[1] * xv; 2112c733ed4SBarry Smith s[2] -= v[2] * xv; 2122c733ed4SBarry Smith s[3] -= v[3] * xv; 2132c733ed4SBarry Smith s[4] -= v[4] * xv; 2142c733ed4SBarry Smith s[5] -= v[5] * xv; 2152c733ed4SBarry Smith s[6] -= v[6] * xv; 2162c733ed4SBarry Smith s[7] -= v[7] * xv; 2172c733ed4SBarry Smith s[8] -= v[8] * xv; 2182c733ed4SBarry Smith s[9] -= v[9] * xv; 2192c733ed4SBarry Smith s[10] -= v[10] * xv; 2202c733ed4SBarry Smith s[11] -= v[11] * xv; 2212c733ed4SBarry Smith s[12] -= v[12] * xv; 2222c733ed4SBarry Smith v += 13; 2232c733ed4SBarry Smith } 2242c733ed4SBarry Smith } 2259566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 2262c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2272c733ed4SBarry Smith x[idt] += v[0] * s[k]; 2282c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 2292c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 2302c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 2312c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 2322c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 2332c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 2342c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 2352c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 2362c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 2372c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 2382c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 2392c733ed4SBarry Smith x[12 + idt] += v[12] * s[k]; 2402c733ed4SBarry Smith v += 13; 2412c733ed4SBarry Smith } 2422c733ed4SBarry Smith } 2439566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 2449566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 2459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 2462c733ed4SBarry Smith PetscFunctionReturn(0); 2472c733ed4SBarry Smith } 2482c733ed4SBarry Smith 2492c733ed4SBarry Smith /* Block operations are done by accessing one column at at time */ 2502c733ed4SBarry Smith /* Default MatSolve for block size 12 */ 2512c733ed4SBarry Smith 252*9371c9d4SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx) { 2532c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2542c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 2552c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 2562c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 2572c733ed4SBarry Smith PetscScalar s[12]; 2582c733ed4SBarry Smith PetscScalar *x, xv; 2592c733ed4SBarry Smith const PetscScalar *b; 2602c733ed4SBarry Smith 2612c733ed4SBarry Smith PetscFunctionBegin; 2629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 2639566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2642c733ed4SBarry Smith 2652c733ed4SBarry Smith /* forward solve the lower triangular */ 2662c733ed4SBarry Smith for (i = 0; i < n; i++) { 2672c733ed4SBarry Smith v = aa + bs2 * ai[i]; 2682c733ed4SBarry Smith vi = aj + ai[i]; 2692c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 2702c733ed4SBarry Smith idt = bs * i; 271*9371c9d4SSatish Balay x[idt] = b[idt]; 272*9371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 273*9371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 274*9371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 275*9371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 276*9371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 277*9371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 278*9371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 279*9371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 280*9371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 281*9371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 282*9371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 2832c733ed4SBarry Smith for (m = 0; m < nz; m++) { 2842c733ed4SBarry Smith idx = bs * vi[m]; 2852c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2862c733ed4SBarry Smith xv = x[k + idx]; 2872c733ed4SBarry Smith x[idt] -= v[0] * xv; 2882c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 2892c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 2902c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 2912c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 2922c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 2932c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 2942c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 2952c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 2962c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 2972c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 2982c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 2992c733ed4SBarry Smith v += 12; 3002c733ed4SBarry Smith } 3012c733ed4SBarry Smith } 3022c733ed4SBarry Smith } 3032c733ed4SBarry Smith /* backward solve the upper triangular */ 3042c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 3052c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 3062c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 3072c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 3082c733ed4SBarry Smith idt = bs * i; 309*9371c9d4SSatish Balay s[0] = x[idt]; 310*9371c9d4SSatish Balay s[1] = x[1 + idt]; 311*9371c9d4SSatish Balay s[2] = x[2 + idt]; 312*9371c9d4SSatish Balay s[3] = x[3 + idt]; 313*9371c9d4SSatish Balay s[4] = x[4 + idt]; 314*9371c9d4SSatish Balay s[5] = x[5 + idt]; 315*9371c9d4SSatish Balay s[6] = x[6 + idt]; 316*9371c9d4SSatish Balay s[7] = x[7 + idt]; 317*9371c9d4SSatish Balay s[8] = x[8 + idt]; 318*9371c9d4SSatish Balay s[9] = x[9 + idt]; 319*9371c9d4SSatish Balay s[10] = x[10 + idt]; 320*9371c9d4SSatish Balay s[11] = x[11 + idt]; 3212c733ed4SBarry Smith 3222c733ed4SBarry Smith for (m = 0; m < nz; m++) { 3232c733ed4SBarry Smith idx = bs * vi[m]; 3242c733ed4SBarry Smith for (k = 0; k < bs; k++) { 3252c733ed4SBarry Smith xv = x[k + idx]; 3262c733ed4SBarry Smith s[0] -= v[0] * xv; 3272c733ed4SBarry Smith s[1] -= v[1] * xv; 3282c733ed4SBarry Smith s[2] -= v[2] * xv; 3292c733ed4SBarry Smith s[3] -= v[3] * xv; 3302c733ed4SBarry Smith s[4] -= v[4] * xv; 3312c733ed4SBarry Smith s[5] -= v[5] * xv; 3322c733ed4SBarry Smith s[6] -= v[6] * xv; 3332c733ed4SBarry Smith s[7] -= v[7] * xv; 3342c733ed4SBarry Smith s[8] -= v[8] * xv; 3352c733ed4SBarry Smith s[9] -= v[9] * xv; 3362c733ed4SBarry Smith s[10] -= v[10] * xv; 3372c733ed4SBarry Smith s[11] -= v[11] * xv; 3382c733ed4SBarry Smith v += 12; 3392c733ed4SBarry Smith } 3402c733ed4SBarry Smith } 3419566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 3422c733ed4SBarry Smith for (k = 0; k < bs; k++) { 3432c733ed4SBarry Smith x[idt] += v[0] * s[k]; 3442c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 3452c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 3462c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 3472c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 3482c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 3492c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 3502c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 3512c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 3522c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 3532c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 3542c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 3552c733ed4SBarry Smith v += 12; 3562c733ed4SBarry Smith } 3572c733ed4SBarry Smith } 3589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 3599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 3609566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 3612c733ed4SBarry Smith PetscFunctionReturn(0); 3622c733ed4SBarry Smith } 363