12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h> 22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h> 32c733ed4SBarry Smith 4*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */ 52c733ed4SBarry Smith /* Default MatSolve for block size 14 */ 62c733ed4SBarry Smith 7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx) 8d71ae5a4SJacob Faibussowitsch { 92c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 102c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 112c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 122c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 132c733ed4SBarry Smith PetscScalar s[14]; 142c733ed4SBarry Smith PetscScalar *x, xv; 152c733ed4SBarry Smith const PetscScalar *b; 162c733ed4SBarry Smith 172c733ed4SBarry Smith PetscFunctionBegin; 189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 199566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 202c733ed4SBarry Smith 212c733ed4SBarry Smith /* forward solve the lower triangular */ 222c733ed4SBarry Smith for (i = 0; i < n; i++) { 232c733ed4SBarry Smith v = aa + bs2 * ai[i]; 242c733ed4SBarry Smith vi = aj + ai[i]; 252c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 262c733ed4SBarry Smith idt = bs * i; 279371c9d4SSatish Balay x[idt] = b[idt]; 289371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 299371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 309371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 319371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 329371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 339371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 349371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 359371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 369371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 379371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 389371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 399371c9d4SSatish Balay x[12 + idt] = b[12 + idt]; 409371c9d4SSatish Balay x[13 + idt] = b[13 + idt]; 412c733ed4SBarry Smith for (m = 0; m < nz; m++) { 422c733ed4SBarry Smith idx = bs * vi[m]; 432c733ed4SBarry Smith for (k = 0; k < bs; k++) { 442c733ed4SBarry Smith xv = x[k + idx]; 452c733ed4SBarry Smith x[idt] -= v[0] * xv; 462c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 472c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 482c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 492c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 502c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 512c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 522c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 532c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 542c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 552c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 562c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 572c733ed4SBarry Smith x[12 + idt] -= v[12] * xv; 582c733ed4SBarry Smith x[13 + idt] -= v[13] * xv; 592c733ed4SBarry Smith v += 14; 602c733ed4SBarry Smith } 612c733ed4SBarry Smith } 622c733ed4SBarry Smith } 632c733ed4SBarry Smith /* backward solve the upper triangular */ 642c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 652c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 662c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 672c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 682c733ed4SBarry Smith idt = bs * i; 699371c9d4SSatish Balay s[0] = x[idt]; 709371c9d4SSatish Balay s[1] = x[1 + idt]; 719371c9d4SSatish Balay s[2] = x[2 + idt]; 729371c9d4SSatish Balay s[3] = x[3 + idt]; 739371c9d4SSatish Balay s[4] = x[4 + idt]; 749371c9d4SSatish Balay s[5] = x[5 + idt]; 759371c9d4SSatish Balay s[6] = x[6 + idt]; 769371c9d4SSatish Balay s[7] = x[7 + idt]; 779371c9d4SSatish Balay s[8] = x[8 + idt]; 789371c9d4SSatish Balay s[9] = x[9 + idt]; 799371c9d4SSatish Balay s[10] = x[10 + idt]; 809371c9d4SSatish Balay s[11] = x[11 + idt]; 819371c9d4SSatish Balay s[12] = x[12 + idt]; 829371c9d4SSatish Balay s[13] = x[13 + idt]; 832c733ed4SBarry Smith 842c733ed4SBarry Smith for (m = 0; m < nz; m++) { 852c733ed4SBarry Smith idx = bs * vi[m]; 862c733ed4SBarry Smith for (k = 0; k < bs; k++) { 872c733ed4SBarry Smith xv = x[k + idx]; 882c733ed4SBarry Smith s[0] -= v[0] * xv; 892c733ed4SBarry Smith s[1] -= v[1] * xv; 902c733ed4SBarry Smith s[2] -= v[2] * xv; 912c733ed4SBarry Smith s[3] -= v[3] * xv; 922c733ed4SBarry Smith s[4] -= v[4] * xv; 932c733ed4SBarry Smith s[5] -= v[5] * xv; 942c733ed4SBarry Smith s[6] -= v[6] * xv; 952c733ed4SBarry Smith s[7] -= v[7] * xv; 962c733ed4SBarry Smith s[8] -= v[8] * xv; 972c733ed4SBarry Smith s[9] -= v[9] * xv; 982c733ed4SBarry Smith s[10] -= v[10] * xv; 992c733ed4SBarry Smith s[11] -= v[11] * xv; 1002c733ed4SBarry Smith s[12] -= v[12] * xv; 1012c733ed4SBarry Smith s[13] -= v[13] * xv; 1022c733ed4SBarry Smith v += 14; 1032c733ed4SBarry Smith } 1042c733ed4SBarry Smith } 1059566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 1062c733ed4SBarry Smith for (k = 0; k < bs; k++) { 1072c733ed4SBarry Smith x[idt] += v[0] * s[k]; 1082c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 1092c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 1102c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 1112c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 1122c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 1132c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 1142c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 1152c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 1162c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 1172c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 1182c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 1192c733ed4SBarry Smith x[12 + idt] += v[12] * s[k]; 1202c733ed4SBarry Smith x[13 + idt] += v[13] * s[k]; 1212c733ed4SBarry Smith v += 14; 1222c733ed4SBarry Smith } 1232c733ed4SBarry Smith } 1249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 1259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1282c733ed4SBarry Smith } 1292c733ed4SBarry Smith 130*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */ 1312c733ed4SBarry Smith /* Default MatSolve for block size 13 */ 1322c733ed4SBarry Smith 133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx) 134d71ae5a4SJacob Faibussowitsch { 1352c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1362c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 1372c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 1382c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 1392c733ed4SBarry Smith PetscScalar s[13]; 1402c733ed4SBarry Smith PetscScalar *x, xv; 1412c733ed4SBarry Smith const PetscScalar *b; 1422c733ed4SBarry Smith 1432c733ed4SBarry Smith PetscFunctionBegin; 1449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 1459566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1462c733ed4SBarry Smith 1472c733ed4SBarry Smith /* forward solve the lower triangular */ 1482c733ed4SBarry Smith for (i = 0; i < n; i++) { 1492c733ed4SBarry Smith v = aa + bs2 * ai[i]; 1502c733ed4SBarry Smith vi = aj + ai[i]; 1512c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 1522c733ed4SBarry Smith idt = bs * i; 1539371c9d4SSatish Balay x[idt] = b[idt]; 1549371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 1559371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 1569371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 1579371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 1589371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 1599371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 1609371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 1619371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 1629371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 1639371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 1649371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 1659371c9d4SSatish Balay x[12 + idt] = b[12 + idt]; 1662c733ed4SBarry Smith for (m = 0; m < nz; m++) { 1672c733ed4SBarry Smith idx = bs * vi[m]; 1682c733ed4SBarry Smith for (k = 0; k < bs; k++) { 1692c733ed4SBarry Smith xv = x[k + idx]; 1702c733ed4SBarry Smith x[idt] -= v[0] * xv; 1712c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 1722c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 1732c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 1742c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 1752c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 1762c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 1772c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 1782c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 1792c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 1802c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 1812c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 1822c733ed4SBarry Smith x[12 + idt] -= v[12] * xv; 1832c733ed4SBarry Smith v += 13; 1842c733ed4SBarry Smith } 1852c733ed4SBarry Smith } 1862c733ed4SBarry Smith } 1872c733ed4SBarry Smith /* backward solve the upper triangular */ 1882c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 1892c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 1902c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 1912c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 1922c733ed4SBarry Smith idt = bs * i; 1939371c9d4SSatish Balay s[0] = x[idt]; 1949371c9d4SSatish Balay s[1] = x[1 + idt]; 1959371c9d4SSatish Balay s[2] = x[2 + idt]; 1969371c9d4SSatish Balay s[3] = x[3 + idt]; 1979371c9d4SSatish Balay s[4] = x[4 + idt]; 1989371c9d4SSatish Balay s[5] = x[5 + idt]; 1999371c9d4SSatish Balay s[6] = x[6 + idt]; 2009371c9d4SSatish Balay s[7] = x[7 + idt]; 2019371c9d4SSatish Balay s[8] = x[8 + idt]; 2029371c9d4SSatish Balay s[9] = x[9 + idt]; 2039371c9d4SSatish Balay s[10] = x[10 + idt]; 2049371c9d4SSatish Balay s[11] = x[11 + idt]; 2059371c9d4SSatish Balay s[12] = x[12 + idt]; 2062c733ed4SBarry Smith 2072c733ed4SBarry Smith for (m = 0; m < nz; m++) { 2082c733ed4SBarry Smith idx = bs * vi[m]; 2092c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2102c733ed4SBarry Smith xv = x[k + idx]; 2112c733ed4SBarry Smith s[0] -= v[0] * xv; 2122c733ed4SBarry Smith s[1] -= v[1] * xv; 2132c733ed4SBarry Smith s[2] -= v[2] * xv; 2142c733ed4SBarry Smith s[3] -= v[3] * xv; 2152c733ed4SBarry Smith s[4] -= v[4] * xv; 2162c733ed4SBarry Smith s[5] -= v[5] * xv; 2172c733ed4SBarry Smith s[6] -= v[6] * xv; 2182c733ed4SBarry Smith s[7] -= v[7] * xv; 2192c733ed4SBarry Smith s[8] -= v[8] * xv; 2202c733ed4SBarry Smith s[9] -= v[9] * xv; 2212c733ed4SBarry Smith s[10] -= v[10] * xv; 2222c733ed4SBarry Smith s[11] -= v[11] * xv; 2232c733ed4SBarry Smith s[12] -= v[12] * xv; 2242c733ed4SBarry Smith v += 13; 2252c733ed4SBarry Smith } 2262c733ed4SBarry Smith } 2279566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 2282c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2292c733ed4SBarry Smith x[idt] += v[0] * s[k]; 2302c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 2312c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 2322c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 2332c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 2342c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 2352c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 2362c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 2372c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 2382c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 2392c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 2402c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 2412c733ed4SBarry Smith x[12 + idt] += v[12] * s[k]; 2422c733ed4SBarry Smith v += 13; 2432c733ed4SBarry Smith } 2442c733ed4SBarry Smith } 2459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 2469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 2479566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 2483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2492c733ed4SBarry Smith } 2502c733ed4SBarry Smith 251*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */ 2522c733ed4SBarry Smith /* Default MatSolve for block size 12 */ 2532c733ed4SBarry Smith 254d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx) 255d71ae5a4SJacob Faibussowitsch { 2562c733ed4SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2572c733ed4SBarry Smith const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2; 2582c733ed4SBarry Smith PetscInt i, k, nz, idx, idt, m; 2592c733ed4SBarry Smith const MatScalar *aa = a->a, *v; 2602c733ed4SBarry Smith PetscScalar s[12]; 2612c733ed4SBarry Smith PetscScalar *x, xv; 2622c733ed4SBarry Smith const PetscScalar *b; 2632c733ed4SBarry Smith 2642c733ed4SBarry Smith PetscFunctionBegin; 2659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 2669566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2672c733ed4SBarry Smith 2682c733ed4SBarry Smith /* forward solve the lower triangular */ 2692c733ed4SBarry Smith for (i = 0; i < n; i++) { 2702c733ed4SBarry Smith v = aa + bs2 * ai[i]; 2712c733ed4SBarry Smith vi = aj + ai[i]; 2722c733ed4SBarry Smith nz = ai[i + 1] - ai[i]; 2732c733ed4SBarry Smith idt = bs * i; 2749371c9d4SSatish Balay x[idt] = b[idt]; 2759371c9d4SSatish Balay x[1 + idt] = b[1 + idt]; 2769371c9d4SSatish Balay x[2 + idt] = b[2 + idt]; 2779371c9d4SSatish Balay x[3 + idt] = b[3 + idt]; 2789371c9d4SSatish Balay x[4 + idt] = b[4 + idt]; 2799371c9d4SSatish Balay x[5 + idt] = b[5 + idt]; 2809371c9d4SSatish Balay x[6 + idt] = b[6 + idt]; 2819371c9d4SSatish Balay x[7 + idt] = b[7 + idt]; 2829371c9d4SSatish Balay x[8 + idt] = b[8 + idt]; 2839371c9d4SSatish Balay x[9 + idt] = b[9 + idt]; 2849371c9d4SSatish Balay x[10 + idt] = b[10 + idt]; 2859371c9d4SSatish Balay x[11 + idt] = b[11 + idt]; 2862c733ed4SBarry Smith for (m = 0; m < nz; m++) { 2872c733ed4SBarry Smith idx = bs * vi[m]; 2882c733ed4SBarry Smith for (k = 0; k < bs; k++) { 2892c733ed4SBarry Smith xv = x[k + idx]; 2902c733ed4SBarry Smith x[idt] -= v[0] * xv; 2912c733ed4SBarry Smith x[1 + idt] -= v[1] * xv; 2922c733ed4SBarry Smith x[2 + idt] -= v[2] * xv; 2932c733ed4SBarry Smith x[3 + idt] -= v[3] * xv; 2942c733ed4SBarry Smith x[4 + idt] -= v[4] * xv; 2952c733ed4SBarry Smith x[5 + idt] -= v[5] * xv; 2962c733ed4SBarry Smith x[6 + idt] -= v[6] * xv; 2972c733ed4SBarry Smith x[7 + idt] -= v[7] * xv; 2982c733ed4SBarry Smith x[8 + idt] -= v[8] * xv; 2992c733ed4SBarry Smith x[9 + idt] -= v[9] * xv; 3002c733ed4SBarry Smith x[10 + idt] -= v[10] * xv; 3012c733ed4SBarry Smith x[11 + idt] -= v[11] * xv; 3022c733ed4SBarry Smith v += 12; 3032c733ed4SBarry Smith } 3042c733ed4SBarry Smith } 3052c733ed4SBarry Smith } 3062c733ed4SBarry Smith /* backward solve the upper triangular */ 3072c733ed4SBarry Smith for (i = n - 1; i >= 0; i--) { 3082c733ed4SBarry Smith v = aa + bs2 * (adiag[i + 1] + 1); 3092c733ed4SBarry Smith vi = aj + adiag[i + 1] + 1; 3102c733ed4SBarry Smith nz = adiag[i] - adiag[i + 1] - 1; 3112c733ed4SBarry Smith idt = bs * i; 3129371c9d4SSatish Balay s[0] = x[idt]; 3139371c9d4SSatish Balay s[1] = x[1 + idt]; 3149371c9d4SSatish Balay s[2] = x[2 + idt]; 3159371c9d4SSatish Balay s[3] = x[3 + idt]; 3169371c9d4SSatish Balay s[4] = x[4 + idt]; 3179371c9d4SSatish Balay s[5] = x[5 + idt]; 3189371c9d4SSatish Balay s[6] = x[6 + idt]; 3199371c9d4SSatish Balay s[7] = x[7 + idt]; 3209371c9d4SSatish Balay s[8] = x[8 + idt]; 3219371c9d4SSatish Balay s[9] = x[9 + idt]; 3229371c9d4SSatish Balay s[10] = x[10 + idt]; 3239371c9d4SSatish Balay s[11] = x[11 + idt]; 3242c733ed4SBarry Smith 3252c733ed4SBarry Smith for (m = 0; m < nz; m++) { 3262c733ed4SBarry Smith idx = bs * vi[m]; 3272c733ed4SBarry Smith for (k = 0; k < bs; k++) { 3282c733ed4SBarry Smith xv = x[k + idx]; 3292c733ed4SBarry Smith s[0] -= v[0] * xv; 3302c733ed4SBarry Smith s[1] -= v[1] * xv; 3312c733ed4SBarry Smith s[2] -= v[2] * xv; 3322c733ed4SBarry Smith s[3] -= v[3] * xv; 3332c733ed4SBarry Smith s[4] -= v[4] * xv; 3342c733ed4SBarry Smith s[5] -= v[5] * xv; 3352c733ed4SBarry Smith s[6] -= v[6] * xv; 3362c733ed4SBarry Smith s[7] -= v[7] * xv; 3372c733ed4SBarry Smith s[8] -= v[8] * xv; 3382c733ed4SBarry Smith s[9] -= v[9] * xv; 3392c733ed4SBarry Smith s[10] -= v[10] * xv; 3402c733ed4SBarry Smith s[11] -= v[11] * xv; 3412c733ed4SBarry Smith v += 12; 3422c733ed4SBarry Smith } 3432c733ed4SBarry Smith } 3449566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(x + idt, bs)); 3452c733ed4SBarry Smith for (k = 0; k < bs; k++) { 3462c733ed4SBarry Smith x[idt] += v[0] * s[k]; 3472c733ed4SBarry Smith x[1 + idt] += v[1] * s[k]; 3482c733ed4SBarry Smith x[2 + idt] += v[2] * s[k]; 3492c733ed4SBarry Smith x[3 + idt] += v[3] * s[k]; 3502c733ed4SBarry Smith x[4 + idt] += v[4] * s[k]; 3512c733ed4SBarry Smith x[5 + idt] += v[5] * s[k]; 3522c733ed4SBarry Smith x[6 + idt] += v[6] * s[k]; 3532c733ed4SBarry Smith x[7 + idt] += v[7] * s[k]; 3542c733ed4SBarry Smith x[8 + idt] += v[8] * s[k]; 3552c733ed4SBarry Smith x[9 + idt] += v[9] * s[k]; 3562c733ed4SBarry Smith x[10 + idt] += v[10] * s[k]; 3572c733ed4SBarry Smith x[11 + idt] += v[11] * s[k]; 3582c733ed4SBarry Smith v += 12; 3592c733ed4SBarry Smith } 3602c733ed4SBarry Smith } 3619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 3629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 3639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n)); 3643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3652c733ed4SBarry Smith } 366