128e30874SSatish Balay #include <../src/mat/impls/baij/seq/baij.h> 2af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 328e30874SSatish Balay 4d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N_inplace(Mat A, Vec bb, Vec xx) 5d71ae5a4SJacob Faibussowitsch { 628e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 728e30874SSatish Balay IS iscol = a->col, isrow = a->row; 828e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 928e30874SSatish Balay const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *vi; 1028e30874SSatish Balay PetscInt i, nz; 1128e30874SSatish Balay const PetscInt bs = A->rmap->bs, bs2 = a->bs2; 1228e30874SSatish Balay const MatScalar *aa = a->a, *v; 1328e30874SSatish Balay PetscScalar *x, *s, *t, *ls; 1428e30874SSatish Balay const PetscScalar *b; 1528e30874SSatish Balay 1628e30874SSatish Balay PetscFunctionBegin; 17*ce6b3536SJed Brown PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs); 189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 199566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 2028e30874SSatish Balay t = a->solve_work; 2128e30874SSatish Balay 229371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 239371c9d4SSatish Balay r = rout; 249371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 259371c9d4SSatish Balay c = cout + (n - 1); 2628e30874SSatish Balay 2728e30874SSatish Balay /* forward solve the lower triangular */ 289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b + bs * (*r++), bs)); 2928e30874SSatish Balay for (i = 1; i < n; i++) { 3028e30874SSatish Balay v = aa + bs2 * ai[i]; 3128e30874SSatish Balay vi = aj + ai[i]; 3228e30874SSatish Balay nz = a->diag[i] - ai[i]; 3328e30874SSatish Balay s = t + bs * i; 349566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(s, b + bs * (*r++), bs)); 3528e30874SSatish Balay while (nz--) { 3628e30874SSatish Balay PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * (*vi++)); 3728e30874SSatish Balay v += bs2; 3828e30874SSatish Balay } 3928e30874SSatish Balay } 4028e30874SSatish Balay /* backward solve the upper triangular */ 4128e30874SSatish Balay ls = a->solve_work + A->cmap->n; 4228e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 4328e30874SSatish Balay v = aa + bs2 * (a->diag[i] + 1); 4428e30874SSatish Balay vi = aj + a->diag[i] + 1; 4528e30874SSatish Balay nz = ai[i + 1] - a->diag[i] - 1; 469566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ls, t + i * bs, bs)); 4728e30874SSatish Balay while (nz--) { 4828e30874SSatish Balay PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * (*vi++)); 4928e30874SSatish Balay v += bs2; 5028e30874SSatish Balay } 5128e30874SSatish Balay PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * a->diag[i], t + i * bs); 529566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x + bs * (*c--), t + i * bs, bs)); 5328e30874SSatish Balay } 5428e30874SSatish Balay 559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 569566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 589566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n)); 603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6128e30874SSatish Balay } 6228e30874SSatish Balay 63d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_7_inplace(Mat A, Vec bb, Vec xx) 64d71ae5a4SJacob Faibussowitsch { 6528e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 6628e30874SSatish Balay IS iscol = a->col, isrow = a->row; 6728e30874SSatish Balay const PetscInt *r, *c, *ai = a->i, *aj = a->j; 6828e30874SSatish Balay const PetscInt *rout, *cout, *diag = a->diag, *vi, n = a->mbs; 6928e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 7028e30874SSatish Balay const MatScalar *aa = a->a, *v; 7128e30874SSatish Balay PetscScalar s1, s2, s3, s4, s5, s6, s7, x1, x2, x3, x4, x5, x6, x7, *x, *t; 7228e30874SSatish Balay const PetscScalar *b; 7328e30874SSatish Balay 7428e30874SSatish Balay PetscFunctionBegin; 759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 769566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 7728e30874SSatish Balay t = a->solve_work; 7828e30874SSatish Balay 799371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 809371c9d4SSatish Balay r = rout; 819371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 829371c9d4SSatish Balay c = cout + (n - 1); 8328e30874SSatish Balay 8428e30874SSatish Balay /* forward solve the lower triangular */ 8528e30874SSatish Balay idx = 7 * (*r++); 869371c9d4SSatish Balay t[0] = b[idx]; 879371c9d4SSatish Balay t[1] = b[1 + idx]; 889371c9d4SSatish Balay t[2] = b[2 + idx]; 899371c9d4SSatish Balay t[3] = b[3 + idx]; 909371c9d4SSatish Balay t[4] = b[4 + idx]; 919371c9d4SSatish Balay t[5] = b[5 + idx]; 929371c9d4SSatish Balay t[6] = b[6 + idx]; 9328e30874SSatish Balay 9428e30874SSatish Balay for (i = 1; i < n; i++) { 9528e30874SSatish Balay v = aa + 49 * ai[i]; 9628e30874SSatish Balay vi = aj + ai[i]; 9728e30874SSatish Balay nz = diag[i] - ai[i]; 9828e30874SSatish Balay idx = 7 * (*r++); 999371c9d4SSatish Balay s1 = b[idx]; 1009371c9d4SSatish Balay s2 = b[1 + idx]; 1019371c9d4SSatish Balay s3 = b[2 + idx]; 1029371c9d4SSatish Balay s4 = b[3 + idx]; 1039371c9d4SSatish Balay s5 = b[4 + idx]; 1049371c9d4SSatish Balay s6 = b[5 + idx]; 1059371c9d4SSatish Balay s7 = b[6 + idx]; 10628e30874SSatish Balay while (nz--) { 10728e30874SSatish Balay idx = 7 * (*vi++); 1089371c9d4SSatish Balay x1 = t[idx]; 1099371c9d4SSatish Balay x2 = t[1 + idx]; 1109371c9d4SSatish Balay x3 = t[2 + idx]; 1119371c9d4SSatish Balay x4 = t[3 + idx]; 1129371c9d4SSatish Balay x5 = t[4 + idx]; 1139371c9d4SSatish Balay x6 = t[5 + idx]; 1149371c9d4SSatish Balay x7 = t[6 + idx]; 11528e30874SSatish Balay s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; 11628e30874SSatish Balay s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; 11728e30874SSatish Balay s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; 11828e30874SSatish Balay s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; 11928e30874SSatish Balay s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; 12028e30874SSatish Balay s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; 12128e30874SSatish Balay s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; 12228e30874SSatish Balay v += 49; 12328e30874SSatish Balay } 12428e30874SSatish Balay idx = 7 * i; 1259371c9d4SSatish Balay t[idx] = s1; 1269371c9d4SSatish Balay t[1 + idx] = s2; 1279371c9d4SSatish Balay t[2 + idx] = s3; 1289371c9d4SSatish Balay t[3 + idx] = s4; 1299371c9d4SSatish Balay t[4 + idx] = s5; 1309371c9d4SSatish Balay t[5 + idx] = s6; 1319371c9d4SSatish Balay t[6 + idx] = s7; 13228e30874SSatish Balay } 13328e30874SSatish Balay /* backward solve the upper triangular */ 13428e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 13528e30874SSatish Balay v = aa + 49 * diag[i] + 49; 13628e30874SSatish Balay vi = aj + diag[i] + 1; 13728e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 13828e30874SSatish Balay idt = 7 * i; 1399371c9d4SSatish Balay s1 = t[idt]; 1409371c9d4SSatish Balay s2 = t[1 + idt]; 1419371c9d4SSatish Balay s3 = t[2 + idt]; 1429371c9d4SSatish Balay s4 = t[3 + idt]; 1439371c9d4SSatish Balay s5 = t[4 + idt]; 1449371c9d4SSatish Balay s6 = t[5 + idt]; 1459371c9d4SSatish Balay s7 = t[6 + idt]; 14628e30874SSatish Balay while (nz--) { 14728e30874SSatish Balay idx = 7 * (*vi++); 1489371c9d4SSatish Balay x1 = t[idx]; 1499371c9d4SSatish Balay x2 = t[1 + idx]; 1509371c9d4SSatish Balay x3 = t[2 + idx]; 1519371c9d4SSatish Balay x4 = t[3 + idx]; 1529371c9d4SSatish Balay x5 = t[4 + idx]; 1539371c9d4SSatish Balay x6 = t[5 + idx]; 1549371c9d4SSatish Balay x7 = t[6 + idx]; 15528e30874SSatish Balay s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; 15628e30874SSatish Balay s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; 15728e30874SSatish Balay s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; 15828e30874SSatish Balay s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; 15928e30874SSatish Balay s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; 16028e30874SSatish Balay s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; 16128e30874SSatish Balay s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; 16228e30874SSatish Balay v += 49; 16328e30874SSatish Balay } 16428e30874SSatish Balay idc = 7 * (*c--); 16528e30874SSatish Balay v = aa + 49 * diag[i]; 1669371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[7] * s2 + v[14] * s3 + v[21] * s4 + v[28] * s5 + v[35] * s6 + v[42] * s7; 1679371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[8] * s2 + v[15] * s3 + v[22] * s4 + v[29] * s5 + v[36] * s6 + v[43] * s7; 1689371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[9] * s2 + v[16] * s3 + v[23] * s4 + v[30] * s5 + v[37] * s6 + v[44] * s7; 1699371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[10] * s2 + v[17] * s3 + v[24] * s4 + v[31] * s5 + v[38] * s6 + v[45] * s7; 1709371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[11] * s2 + v[18] * s3 + v[25] * s4 + v[32] * s5 + v[39] * s6 + v[46] * s7; 1719371c9d4SSatish Balay x[5 + idc] = t[5 + idt] = v[5] * s1 + v[12] * s2 + v[19] * s3 + v[26] * s4 + v[33] * s5 + v[40] * s6 + v[47] * s7; 1729371c9d4SSatish Balay x[6 + idc] = t[6 + idt] = v[6] * s1 + v[13] * s2 + v[20] * s3 + v[27] * s4 + v[34] * s5 + v[41] * s6 + v[48] * s7; 17328e30874SSatish Balay } 17428e30874SSatish Balay 1759566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 1769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 1779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 1789566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1799566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 49 * (a->nz) - 7.0 * A->cmap->n)); 1803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 18128e30874SSatish Balay } 18228e30874SSatish Balay 183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_7(Mat A, Vec bb, Vec xx) 184d71ae5a4SJacob Faibussowitsch { 18528e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 18628e30874SSatish Balay IS iscol = a->col, isrow = a->row; 18728e30874SSatish Balay const PetscInt *r, *c, *ai = a->i, *aj = a->j, *adiag = a->diag; 18828e30874SSatish Balay const PetscInt n = a->mbs, *rout, *cout, *vi; 18928e30874SSatish Balay PetscInt i, nz, idx, idt, idc, m; 19028e30874SSatish Balay const MatScalar *aa = a->a, *v; 19128e30874SSatish Balay PetscScalar s1, s2, s3, s4, s5, s6, s7, x1, x2, x3, x4, x5, x6, x7, *x, *t; 19228e30874SSatish Balay const PetscScalar *b; 19328e30874SSatish Balay 19428e30874SSatish Balay PetscFunctionBegin; 1959566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 1969566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 19728e30874SSatish Balay t = a->solve_work; 19828e30874SSatish Balay 1999371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 2009371c9d4SSatish Balay r = rout; 2019371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 2029371c9d4SSatish Balay c = cout; 20328e30874SSatish Balay 20428e30874SSatish Balay /* forward solve the lower triangular */ 20528e30874SSatish Balay idx = 7 * r[0]; 2069371c9d4SSatish Balay t[0] = b[idx]; 2079371c9d4SSatish Balay t[1] = b[1 + idx]; 2089371c9d4SSatish Balay t[2] = b[2 + idx]; 2099371c9d4SSatish Balay t[3] = b[3 + idx]; 2109371c9d4SSatish Balay t[4] = b[4 + idx]; 2119371c9d4SSatish Balay t[5] = b[5 + idx]; 2129371c9d4SSatish Balay t[6] = b[6 + idx]; 21328e30874SSatish Balay 21428e30874SSatish Balay for (i = 1; i < n; i++) { 21528e30874SSatish Balay v = aa + 49 * ai[i]; 21628e30874SSatish Balay vi = aj + ai[i]; 21728e30874SSatish Balay nz = ai[i + 1] - ai[i]; 21828e30874SSatish Balay idx = 7 * r[i]; 2199371c9d4SSatish Balay s1 = b[idx]; 2209371c9d4SSatish Balay s2 = b[1 + idx]; 2219371c9d4SSatish Balay s3 = b[2 + idx]; 2229371c9d4SSatish Balay s4 = b[3 + idx]; 2239371c9d4SSatish Balay s5 = b[4 + idx]; 2249371c9d4SSatish Balay s6 = b[5 + idx]; 2259371c9d4SSatish Balay s7 = b[6 + idx]; 22628e30874SSatish Balay for (m = 0; m < nz; m++) { 22728e30874SSatish Balay idx = 7 * vi[m]; 2289371c9d4SSatish Balay x1 = t[idx]; 2299371c9d4SSatish Balay x2 = t[1 + idx]; 2309371c9d4SSatish Balay x3 = t[2 + idx]; 2319371c9d4SSatish Balay x4 = t[3 + idx]; 2329371c9d4SSatish Balay x5 = t[4 + idx]; 2339371c9d4SSatish Balay x6 = t[5 + idx]; 2349371c9d4SSatish Balay x7 = t[6 + idx]; 23528e30874SSatish Balay s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; 23628e30874SSatish Balay s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; 23728e30874SSatish Balay s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; 23828e30874SSatish Balay s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; 23928e30874SSatish Balay s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; 24028e30874SSatish Balay s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; 24128e30874SSatish Balay s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; 24228e30874SSatish Balay v += 49; 24328e30874SSatish Balay } 24428e30874SSatish Balay idx = 7 * i; 2459371c9d4SSatish Balay t[idx] = s1; 2469371c9d4SSatish Balay t[1 + idx] = s2; 2479371c9d4SSatish Balay t[2 + idx] = s3; 2489371c9d4SSatish Balay t[3 + idx] = s4; 2499371c9d4SSatish Balay t[4 + idx] = s5; 2509371c9d4SSatish Balay t[5 + idx] = s6; 2519371c9d4SSatish Balay t[6 + idx] = s7; 25228e30874SSatish Balay } 25328e30874SSatish Balay /* backward solve the upper triangular */ 25428e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 25528e30874SSatish Balay v = aa + 49 * (adiag[i + 1] + 1); 25628e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 25728e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 25828e30874SSatish Balay idt = 7 * i; 2599371c9d4SSatish Balay s1 = t[idt]; 2609371c9d4SSatish Balay s2 = t[1 + idt]; 2619371c9d4SSatish Balay s3 = t[2 + idt]; 2629371c9d4SSatish Balay s4 = t[3 + idt]; 2639371c9d4SSatish Balay s5 = t[4 + idt]; 2649371c9d4SSatish Balay s6 = t[5 + idt]; 2659371c9d4SSatish Balay s7 = t[6 + idt]; 26628e30874SSatish Balay for (m = 0; m < nz; m++) { 26728e30874SSatish Balay idx = 7 * vi[m]; 2689371c9d4SSatish Balay x1 = t[idx]; 2699371c9d4SSatish Balay x2 = t[1 + idx]; 2709371c9d4SSatish Balay x3 = t[2 + idx]; 2719371c9d4SSatish Balay x4 = t[3 + idx]; 2729371c9d4SSatish Balay x5 = t[4 + idx]; 2739371c9d4SSatish Balay x6 = t[5 + idx]; 2749371c9d4SSatish Balay x7 = t[6 + idx]; 27528e30874SSatish Balay s1 -= v[0] * x1 + v[7] * x2 + v[14] * x3 + v[21] * x4 + v[28] * x5 + v[35] * x6 + v[42] * x7; 27628e30874SSatish Balay s2 -= v[1] * x1 + v[8] * x2 + v[15] * x3 + v[22] * x4 + v[29] * x5 + v[36] * x6 + v[43] * x7; 27728e30874SSatish Balay s3 -= v[2] * x1 + v[9] * x2 + v[16] * x3 + v[23] * x4 + v[30] * x5 + v[37] * x6 + v[44] * x7; 27828e30874SSatish Balay s4 -= v[3] * x1 + v[10] * x2 + v[17] * x3 + v[24] * x4 + v[31] * x5 + v[38] * x6 + v[45] * x7; 27928e30874SSatish Balay s5 -= v[4] * x1 + v[11] * x2 + v[18] * x3 + v[25] * x4 + v[32] * x5 + v[39] * x6 + v[46] * x7; 28028e30874SSatish Balay s6 -= v[5] * x1 + v[12] * x2 + v[19] * x3 + v[26] * x4 + v[33] * x5 + v[40] * x6 + v[47] * x7; 28128e30874SSatish Balay s7 -= v[6] * x1 + v[13] * x2 + v[20] * x3 + v[27] * x4 + v[34] * x5 + v[41] * x6 + v[48] * x7; 28228e30874SSatish Balay v += 49; 28328e30874SSatish Balay } 28428e30874SSatish Balay idc = 7 * c[i]; 2859371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[7] * s2 + v[14] * s3 + v[21] * s4 + v[28] * s5 + v[35] * s6 + v[42] * s7; 2869371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[8] * s2 + v[15] * s3 + v[22] * s4 + v[29] * s5 + v[36] * s6 + v[43] * s7; 2879371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[9] * s2 + v[16] * s3 + v[23] * s4 + v[30] * s5 + v[37] * s6 + v[44] * s7; 2889371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[10] * s2 + v[17] * s3 + v[24] * s4 + v[31] * s5 + v[38] * s6 + v[45] * s7; 2899371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[11] * s2 + v[18] * s3 + v[25] * s4 + v[32] * s5 + v[39] * s6 + v[46] * s7; 2909371c9d4SSatish Balay x[5 + idc] = t[5 + idt] = v[5] * s1 + v[12] * s2 + v[19] * s3 + v[26] * s4 + v[33] * s5 + v[40] * s6 + v[47] * s7; 2919371c9d4SSatish Balay x[6 + idc] = t[6 + idt] = v[6] * s1 + v[13] * s2 + v[20] * s3 + v[27] * s4 + v[34] * s5 + v[41] * s6 + v[48] * s7; 29228e30874SSatish Balay } 29328e30874SSatish Balay 2949566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 2959566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 2969566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 2979566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 2989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 49 * (a->nz) - 7.0 * A->cmap->n)); 2993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30028e30874SSatish Balay } 30128e30874SSatish Balay 302d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_6_inplace(Mat A, Vec bb, Vec xx) 303d71ae5a4SJacob Faibussowitsch { 30428e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 30528e30874SSatish Balay IS iscol = a->col, isrow = a->row; 30628e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 30728e30874SSatish Balay const PetscInt *diag = a->diag, n = a->mbs, *vi, *ai = a->i, *aj = a->j; 30828e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 30928e30874SSatish Balay const MatScalar *aa = a->a, *v; 31028e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6, *t; 31128e30874SSatish Balay const PetscScalar *b; 31228e30874SSatish Balay 31328e30874SSatish Balay PetscFunctionBegin; 3149566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 3159566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 31628e30874SSatish Balay t = a->solve_work; 31728e30874SSatish Balay 3189371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 3199371c9d4SSatish Balay r = rout; 3209371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 3219371c9d4SSatish Balay c = cout + (n - 1); 32228e30874SSatish Balay 32328e30874SSatish Balay /* forward solve the lower triangular */ 32428e30874SSatish Balay idx = 6 * (*r++); 3259371c9d4SSatish Balay t[0] = b[idx]; 3269371c9d4SSatish Balay t[1] = b[1 + idx]; 3279371c9d4SSatish Balay t[2] = b[2 + idx]; 3289371c9d4SSatish Balay t[3] = b[3 + idx]; 3299371c9d4SSatish Balay t[4] = b[4 + idx]; 3309371c9d4SSatish Balay t[5] = b[5 + idx]; 33128e30874SSatish Balay for (i = 1; i < n; i++) { 33228e30874SSatish Balay v = aa + 36 * ai[i]; 33328e30874SSatish Balay vi = aj + ai[i]; 33428e30874SSatish Balay nz = diag[i] - ai[i]; 33528e30874SSatish Balay idx = 6 * (*r++); 3369371c9d4SSatish Balay s1 = b[idx]; 3379371c9d4SSatish Balay s2 = b[1 + idx]; 3389371c9d4SSatish Balay s3 = b[2 + idx]; 3399371c9d4SSatish Balay s4 = b[3 + idx]; 3409371c9d4SSatish Balay s5 = b[4 + idx]; 3419371c9d4SSatish Balay s6 = b[5 + idx]; 34228e30874SSatish Balay while (nz--) { 34328e30874SSatish Balay idx = 6 * (*vi++); 3449371c9d4SSatish Balay x1 = t[idx]; 3459371c9d4SSatish Balay x2 = t[1 + idx]; 3469371c9d4SSatish Balay x3 = t[2 + idx]; 3479371c9d4SSatish Balay x4 = t[3 + idx]; 3489371c9d4SSatish Balay x5 = t[4 + idx]; 3499371c9d4SSatish Balay x6 = t[5 + idx]; 35028e30874SSatish Balay s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; 35128e30874SSatish Balay s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; 35228e30874SSatish Balay s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; 35328e30874SSatish Balay s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; 35428e30874SSatish Balay s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; 35528e30874SSatish Balay s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; 35628e30874SSatish Balay v += 36; 35728e30874SSatish Balay } 35828e30874SSatish Balay idx = 6 * i; 3599371c9d4SSatish Balay t[idx] = s1; 3609371c9d4SSatish Balay t[1 + idx] = s2; 3619371c9d4SSatish Balay t[2 + idx] = s3; 3629371c9d4SSatish Balay t[3 + idx] = s4; 3639371c9d4SSatish Balay t[4 + idx] = s5; 3649371c9d4SSatish Balay t[5 + idx] = s6; 36528e30874SSatish Balay } 36628e30874SSatish Balay /* backward solve the upper triangular */ 36728e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 36828e30874SSatish Balay v = aa + 36 * diag[i] + 36; 36928e30874SSatish Balay vi = aj + diag[i] + 1; 37028e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 37128e30874SSatish Balay idt = 6 * i; 3729371c9d4SSatish Balay s1 = t[idt]; 3739371c9d4SSatish Balay s2 = t[1 + idt]; 3749371c9d4SSatish Balay s3 = t[2 + idt]; 3759371c9d4SSatish Balay s4 = t[3 + idt]; 3769371c9d4SSatish Balay s5 = t[4 + idt]; 3779371c9d4SSatish Balay s6 = t[5 + idt]; 37828e30874SSatish Balay while (nz--) { 37928e30874SSatish Balay idx = 6 * (*vi++); 3809371c9d4SSatish Balay x1 = t[idx]; 3819371c9d4SSatish Balay x2 = t[1 + idx]; 3829371c9d4SSatish Balay x3 = t[2 + idx]; 3839371c9d4SSatish Balay x4 = t[3 + idx]; 3849371c9d4SSatish Balay x5 = t[4 + idx]; 3859371c9d4SSatish Balay x6 = t[5 + idx]; 38628e30874SSatish Balay s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; 38728e30874SSatish Balay s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; 38828e30874SSatish Balay s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; 38928e30874SSatish Balay s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; 39028e30874SSatish Balay s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; 39128e30874SSatish Balay s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; 39228e30874SSatish Balay v += 36; 39328e30874SSatish Balay } 39428e30874SSatish Balay idc = 6 * (*c--); 39528e30874SSatish Balay v = aa + 36 * diag[i]; 3969371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6; 3979371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6; 3989371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6; 3999371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6; 4009371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6; 4019371c9d4SSatish Balay x[5 + idc] = t[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6; 40228e30874SSatish Balay } 40328e30874SSatish Balay 4049566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 4059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 4069566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 4079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 4089566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 36 * (a->nz) - 6.0 * A->cmap->n)); 4093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41028e30874SSatish Balay } 41128e30874SSatish Balay 412d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_6(Mat A, Vec bb, Vec xx) 413d71ae5a4SJacob Faibussowitsch { 41428e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 41528e30874SSatish Balay IS iscol = a->col, isrow = a->row; 41628e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 41728e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 41828e30874SSatish Balay PetscInt i, nz, idx, idt, idc, m; 41928e30874SSatish Balay const MatScalar *aa = a->a, *v; 42028e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6, *t; 42128e30874SSatish Balay const PetscScalar *b; 42228e30874SSatish Balay 42328e30874SSatish Balay PetscFunctionBegin; 4249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 4259566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 42628e30874SSatish Balay t = a->solve_work; 42728e30874SSatish Balay 4289371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 4299371c9d4SSatish Balay r = rout; 4309371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 4319371c9d4SSatish Balay c = cout; 43228e30874SSatish Balay 43328e30874SSatish Balay /* forward solve the lower triangular */ 43428e30874SSatish Balay idx = 6 * r[0]; 4359371c9d4SSatish Balay t[0] = b[idx]; 4369371c9d4SSatish Balay t[1] = b[1 + idx]; 4379371c9d4SSatish Balay t[2] = b[2 + idx]; 4389371c9d4SSatish Balay t[3] = b[3 + idx]; 4399371c9d4SSatish Balay t[4] = b[4 + idx]; 4409371c9d4SSatish Balay t[5] = b[5 + idx]; 44128e30874SSatish Balay for (i = 1; i < n; i++) { 44228e30874SSatish Balay v = aa + 36 * ai[i]; 44328e30874SSatish Balay vi = aj + ai[i]; 44428e30874SSatish Balay nz = ai[i + 1] - ai[i]; 44528e30874SSatish Balay idx = 6 * r[i]; 4469371c9d4SSatish Balay s1 = b[idx]; 4479371c9d4SSatish Balay s2 = b[1 + idx]; 4489371c9d4SSatish Balay s3 = b[2 + idx]; 4499371c9d4SSatish Balay s4 = b[3 + idx]; 4509371c9d4SSatish Balay s5 = b[4 + idx]; 4519371c9d4SSatish Balay s6 = b[5 + idx]; 45228e30874SSatish Balay for (m = 0; m < nz; m++) { 45328e30874SSatish Balay idx = 6 * vi[m]; 4549371c9d4SSatish Balay x1 = t[idx]; 4559371c9d4SSatish Balay x2 = t[1 + idx]; 4569371c9d4SSatish Balay x3 = t[2 + idx]; 4579371c9d4SSatish Balay x4 = t[3 + idx]; 4589371c9d4SSatish Balay x5 = t[4 + idx]; 4599371c9d4SSatish Balay x6 = t[5 + idx]; 46028e30874SSatish Balay s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; 46128e30874SSatish Balay s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; 46228e30874SSatish Balay s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; 46328e30874SSatish Balay s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; 46428e30874SSatish Balay s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; 46528e30874SSatish Balay s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; 46628e30874SSatish Balay v += 36; 46728e30874SSatish Balay } 46828e30874SSatish Balay idx = 6 * i; 4699371c9d4SSatish Balay t[idx] = s1; 4709371c9d4SSatish Balay t[1 + idx] = s2; 4719371c9d4SSatish Balay t[2 + idx] = s3; 4729371c9d4SSatish Balay t[3 + idx] = s4; 4739371c9d4SSatish Balay t[4 + idx] = s5; 4749371c9d4SSatish Balay t[5 + idx] = s6; 47528e30874SSatish Balay } 47628e30874SSatish Balay /* backward solve the upper triangular */ 47728e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 47828e30874SSatish Balay v = aa + 36 * (adiag[i + 1] + 1); 47928e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 48028e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 48128e30874SSatish Balay idt = 6 * i; 4829371c9d4SSatish Balay s1 = t[idt]; 4839371c9d4SSatish Balay s2 = t[1 + idt]; 4849371c9d4SSatish Balay s3 = t[2 + idt]; 4859371c9d4SSatish Balay s4 = t[3 + idt]; 4869371c9d4SSatish Balay s5 = t[4 + idt]; 4879371c9d4SSatish Balay s6 = t[5 + idt]; 48828e30874SSatish Balay for (m = 0; m < nz; m++) { 48928e30874SSatish Balay idx = 6 * vi[m]; 4909371c9d4SSatish Balay x1 = t[idx]; 4919371c9d4SSatish Balay x2 = t[1 + idx]; 4929371c9d4SSatish Balay x3 = t[2 + idx]; 4939371c9d4SSatish Balay x4 = t[3 + idx]; 4949371c9d4SSatish Balay x5 = t[4 + idx]; 4959371c9d4SSatish Balay x6 = t[5 + idx]; 49628e30874SSatish Balay s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6; 49728e30874SSatish Balay s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6; 49828e30874SSatish Balay s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6; 49928e30874SSatish Balay s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6; 50028e30874SSatish Balay s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6; 50128e30874SSatish Balay s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6; 50228e30874SSatish Balay v += 36; 50328e30874SSatish Balay } 50428e30874SSatish Balay idc = 6 * c[i]; 5059371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6; 5069371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6; 5079371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6; 5089371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6; 5099371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6; 5109371c9d4SSatish Balay x[5 + idc] = t[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6; 51128e30874SSatish Balay } 51228e30874SSatish Balay 5139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 5149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 5159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 5169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 5179566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 36 * (a->nz) - 6.0 * A->cmap->n)); 5183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51928e30874SSatish Balay } 52028e30874SSatish Balay 521d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_5_inplace(Mat A, Vec bb, Vec xx) 522d71ae5a4SJacob Faibussowitsch { 52328e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 52428e30874SSatish Balay IS iscol = a->col, isrow = a->row; 52528e30874SSatish Balay const PetscInt *r, *c, *rout, *cout, *diag = a->diag; 52628e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 52728e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 52828e30874SSatish Balay const MatScalar *aa = a->a, *v; 52928e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5, *t; 53028e30874SSatish Balay const PetscScalar *b; 53128e30874SSatish Balay 53228e30874SSatish Balay PetscFunctionBegin; 5339566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 5349566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 53528e30874SSatish Balay t = a->solve_work; 53628e30874SSatish Balay 5379371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 5389371c9d4SSatish Balay r = rout; 5399371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 5409371c9d4SSatish Balay c = cout + (n - 1); 54128e30874SSatish Balay 54228e30874SSatish Balay /* forward solve the lower triangular */ 54328e30874SSatish Balay idx = 5 * (*r++); 5449371c9d4SSatish Balay t[0] = b[idx]; 5459371c9d4SSatish Balay t[1] = b[1 + idx]; 5469371c9d4SSatish Balay t[2] = b[2 + idx]; 5479371c9d4SSatish Balay t[3] = b[3 + idx]; 5489371c9d4SSatish Balay t[4] = b[4 + idx]; 54928e30874SSatish Balay for (i = 1; i < n; i++) { 55028e30874SSatish Balay v = aa + 25 * ai[i]; 55128e30874SSatish Balay vi = aj + ai[i]; 55228e30874SSatish Balay nz = diag[i] - ai[i]; 55328e30874SSatish Balay idx = 5 * (*r++); 5549371c9d4SSatish Balay s1 = b[idx]; 5559371c9d4SSatish Balay s2 = b[1 + idx]; 5569371c9d4SSatish Balay s3 = b[2 + idx]; 5579371c9d4SSatish Balay s4 = b[3 + idx]; 55828e30874SSatish Balay s5 = b[4 + idx]; 55928e30874SSatish Balay while (nz--) { 56028e30874SSatish Balay idx = 5 * (*vi++); 5619371c9d4SSatish Balay x1 = t[idx]; 5629371c9d4SSatish Balay x2 = t[1 + idx]; 5639371c9d4SSatish Balay x3 = t[2 + idx]; 5649371c9d4SSatish Balay x4 = t[3 + idx]; 5659371c9d4SSatish Balay x5 = t[4 + idx]; 56628e30874SSatish Balay s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 56728e30874SSatish Balay s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 56828e30874SSatish Balay s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 56928e30874SSatish Balay s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 57028e30874SSatish Balay s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 57128e30874SSatish Balay v += 25; 57228e30874SSatish Balay } 57328e30874SSatish Balay idx = 5 * i; 5749371c9d4SSatish Balay t[idx] = s1; 5759371c9d4SSatish Balay t[1 + idx] = s2; 5769371c9d4SSatish Balay t[2 + idx] = s3; 5779371c9d4SSatish Balay t[3 + idx] = s4; 5789371c9d4SSatish Balay t[4 + idx] = s5; 57928e30874SSatish Balay } 58028e30874SSatish Balay /* backward solve the upper triangular */ 58128e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 58228e30874SSatish Balay v = aa + 25 * diag[i] + 25; 58328e30874SSatish Balay vi = aj + diag[i] + 1; 58428e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 58528e30874SSatish Balay idt = 5 * i; 5869371c9d4SSatish Balay s1 = t[idt]; 5879371c9d4SSatish Balay s2 = t[1 + idt]; 5889371c9d4SSatish Balay s3 = t[2 + idt]; 5899371c9d4SSatish Balay s4 = t[3 + idt]; 5909371c9d4SSatish Balay s5 = t[4 + idt]; 59128e30874SSatish Balay while (nz--) { 59228e30874SSatish Balay idx = 5 * (*vi++); 5939371c9d4SSatish Balay x1 = t[idx]; 5949371c9d4SSatish Balay x2 = t[1 + idx]; 5959371c9d4SSatish Balay x3 = t[2 + idx]; 5969371c9d4SSatish Balay x4 = t[3 + idx]; 5979371c9d4SSatish Balay x5 = t[4 + idx]; 59828e30874SSatish Balay s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 59928e30874SSatish Balay s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 60028e30874SSatish Balay s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 60128e30874SSatish Balay s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 60228e30874SSatish Balay s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 60328e30874SSatish Balay v += 25; 60428e30874SSatish Balay } 60528e30874SSatish Balay idc = 5 * (*c--); 60628e30874SSatish Balay v = aa + 25 * diag[i]; 6079371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5; 6089371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5; 6099371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5; 6109371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5; 6119371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5; 61228e30874SSatish Balay } 61328e30874SSatish Balay 6149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 6159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 6169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 6179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 6189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n)); 6193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 62028e30874SSatish Balay } 62128e30874SSatish Balay 622d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_5(Mat A, Vec bb, Vec xx) 623d71ae5a4SJacob Faibussowitsch { 62428e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 62528e30874SSatish Balay IS iscol = a->col, isrow = a->row; 62628e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 62728e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 62828e30874SSatish Balay PetscInt i, nz, idx, idt, idc, m; 62928e30874SSatish Balay const MatScalar *aa = a->a, *v; 63028e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5, *t; 63128e30874SSatish Balay const PetscScalar *b; 63228e30874SSatish Balay 63328e30874SSatish Balay PetscFunctionBegin; 6349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 6359566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 63628e30874SSatish Balay t = a->solve_work; 63728e30874SSatish Balay 6389371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 6399371c9d4SSatish Balay r = rout; 6409371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 6419371c9d4SSatish Balay c = cout; 64228e30874SSatish Balay 64328e30874SSatish Balay /* forward solve the lower triangular */ 64428e30874SSatish Balay idx = 5 * r[0]; 6459371c9d4SSatish Balay t[0] = b[idx]; 6469371c9d4SSatish Balay t[1] = b[1 + idx]; 6479371c9d4SSatish Balay t[2] = b[2 + idx]; 6489371c9d4SSatish Balay t[3] = b[3 + idx]; 6499371c9d4SSatish Balay t[4] = b[4 + idx]; 65028e30874SSatish Balay for (i = 1; i < n; i++) { 65128e30874SSatish Balay v = aa + 25 * ai[i]; 65228e30874SSatish Balay vi = aj + ai[i]; 65328e30874SSatish Balay nz = ai[i + 1] - ai[i]; 65428e30874SSatish Balay idx = 5 * r[i]; 6559371c9d4SSatish Balay s1 = b[idx]; 6569371c9d4SSatish Balay s2 = b[1 + idx]; 6579371c9d4SSatish Balay s3 = b[2 + idx]; 6589371c9d4SSatish Balay s4 = b[3 + idx]; 65928e30874SSatish Balay s5 = b[4 + idx]; 66028e30874SSatish Balay for (m = 0; m < nz; m++) { 66128e30874SSatish Balay idx = 5 * vi[m]; 6629371c9d4SSatish Balay x1 = t[idx]; 6639371c9d4SSatish Balay x2 = t[1 + idx]; 6649371c9d4SSatish Balay x3 = t[2 + idx]; 6659371c9d4SSatish Balay x4 = t[3 + idx]; 6669371c9d4SSatish Balay x5 = t[4 + idx]; 66728e30874SSatish Balay s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 66828e30874SSatish Balay s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 66928e30874SSatish Balay s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 67028e30874SSatish Balay s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 67128e30874SSatish Balay s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 67228e30874SSatish Balay v += 25; 67328e30874SSatish Balay } 67428e30874SSatish Balay idx = 5 * i; 6759371c9d4SSatish Balay t[idx] = s1; 6769371c9d4SSatish Balay t[1 + idx] = s2; 6779371c9d4SSatish Balay t[2 + idx] = s3; 6789371c9d4SSatish Balay t[3 + idx] = s4; 6799371c9d4SSatish Balay t[4 + idx] = s5; 68028e30874SSatish Balay } 68128e30874SSatish Balay /* backward solve the upper triangular */ 68228e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 68328e30874SSatish Balay v = aa + 25 * (adiag[i + 1] + 1); 68428e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 68528e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 68628e30874SSatish Balay idt = 5 * i; 6879371c9d4SSatish Balay s1 = t[idt]; 6889371c9d4SSatish Balay s2 = t[1 + idt]; 6899371c9d4SSatish Balay s3 = t[2 + idt]; 6909371c9d4SSatish Balay s4 = t[3 + idt]; 6919371c9d4SSatish Balay s5 = t[4 + idt]; 69228e30874SSatish Balay for (m = 0; m < nz; m++) { 69328e30874SSatish Balay idx = 5 * vi[m]; 6949371c9d4SSatish Balay x1 = t[idx]; 6959371c9d4SSatish Balay x2 = t[1 + idx]; 6969371c9d4SSatish Balay x3 = t[2 + idx]; 6979371c9d4SSatish Balay x4 = t[3 + idx]; 6989371c9d4SSatish Balay x5 = t[4 + idx]; 69928e30874SSatish Balay s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5; 70028e30874SSatish Balay s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5; 70128e30874SSatish Balay s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5; 70228e30874SSatish Balay s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5; 70328e30874SSatish Balay s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5; 70428e30874SSatish Balay v += 25; 70528e30874SSatish Balay } 70628e30874SSatish Balay idc = 5 * c[i]; 7079371c9d4SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5; 7089371c9d4SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5; 7099371c9d4SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5; 7109371c9d4SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5; 7119371c9d4SSatish Balay x[4 + idc] = t[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5; 71228e30874SSatish Balay } 71328e30874SSatish Balay 7149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 7159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 7169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 7179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 7189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n)); 7193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 72028e30874SSatish Balay } 72128e30874SSatish Balay 722d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_4_inplace(Mat A, Vec bb, Vec xx) 723d71ae5a4SJacob Faibussowitsch { 72428e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 72528e30874SSatish Balay IS iscol = a->col, isrow = a->row; 72628e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 72728e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 72828e30874SSatish Balay const PetscInt *r, *c, *diag = a->diag, *rout, *cout; 72928e30874SSatish Balay const MatScalar *aa = a->a, *v; 73028e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, x1, x2, x3, x4, *t; 73128e30874SSatish Balay const PetscScalar *b; 73228e30874SSatish Balay 73328e30874SSatish Balay PetscFunctionBegin; 7349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 7359566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 73628e30874SSatish Balay t = a->solve_work; 73728e30874SSatish Balay 7389371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 7399371c9d4SSatish Balay r = rout; 7409371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 7419371c9d4SSatish Balay c = cout + (n - 1); 74228e30874SSatish Balay 74328e30874SSatish Balay /* forward solve the lower triangular */ 74428e30874SSatish Balay idx = 4 * (*r++); 7459371c9d4SSatish Balay t[0] = b[idx]; 7469371c9d4SSatish Balay t[1] = b[1 + idx]; 7479371c9d4SSatish Balay t[2] = b[2 + idx]; 7489371c9d4SSatish Balay t[3] = b[3 + idx]; 74928e30874SSatish Balay for (i = 1; i < n; i++) { 75028e30874SSatish Balay v = aa + 16 * ai[i]; 75128e30874SSatish Balay vi = aj + ai[i]; 75228e30874SSatish Balay nz = diag[i] - ai[i]; 75328e30874SSatish Balay idx = 4 * (*r++); 7549371c9d4SSatish Balay s1 = b[idx]; 7559371c9d4SSatish Balay s2 = b[1 + idx]; 7569371c9d4SSatish Balay s3 = b[2 + idx]; 7579371c9d4SSatish Balay s4 = b[3 + idx]; 75828e30874SSatish Balay while (nz--) { 75928e30874SSatish Balay idx = 4 * (*vi++); 7609371c9d4SSatish Balay x1 = t[idx]; 7619371c9d4SSatish Balay x2 = t[1 + idx]; 7629371c9d4SSatish Balay x3 = t[2 + idx]; 7639371c9d4SSatish Balay x4 = t[3 + idx]; 76428e30874SSatish Balay s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; 76528e30874SSatish Balay s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; 76628e30874SSatish Balay s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; 76728e30874SSatish Balay s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; 76828e30874SSatish Balay v += 16; 76928e30874SSatish Balay } 77028e30874SSatish Balay idx = 4 * i; 7719371c9d4SSatish Balay t[idx] = s1; 7729371c9d4SSatish Balay t[1 + idx] = s2; 7739371c9d4SSatish Balay t[2 + idx] = s3; 7749371c9d4SSatish Balay t[3 + idx] = s4; 77528e30874SSatish Balay } 77628e30874SSatish Balay /* backward solve the upper triangular */ 77728e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 77828e30874SSatish Balay v = aa + 16 * diag[i] + 16; 77928e30874SSatish Balay vi = aj + diag[i] + 1; 78028e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 78128e30874SSatish Balay idt = 4 * i; 7829371c9d4SSatish Balay s1 = t[idt]; 7839371c9d4SSatish Balay s2 = t[1 + idt]; 7849371c9d4SSatish Balay s3 = t[2 + idt]; 7859371c9d4SSatish Balay s4 = t[3 + idt]; 78628e30874SSatish Balay while (nz--) { 78728e30874SSatish Balay idx = 4 * (*vi++); 7889371c9d4SSatish Balay x1 = t[idx]; 7899371c9d4SSatish Balay x2 = t[1 + idx]; 7909371c9d4SSatish Balay x3 = t[2 + idx]; 7919371c9d4SSatish Balay x4 = t[3 + idx]; 79228e30874SSatish Balay s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; 79328e30874SSatish Balay s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; 79428e30874SSatish Balay s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; 79528e30874SSatish Balay s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; 79628e30874SSatish Balay v += 16; 79728e30874SSatish Balay } 79828e30874SSatish Balay idc = 4 * (*c--); 79928e30874SSatish Balay v = aa + 16 * diag[i]; 80028e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[4] * s2 + v[8] * s3 + v[12] * s4; 80128e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[5] * s2 + v[9] * s3 + v[13] * s4; 80228e30874SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[6] * s2 + v[10] * s3 + v[14] * s4; 80328e30874SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[7] * s2 + v[11] * s3 + v[15] * s4; 80428e30874SSatish Balay } 80528e30874SSatish Balay 8069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 8079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 8089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 8099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 8109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 16 * (a->nz) - 4.0 * A->cmap->n)); 8113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81228e30874SSatish Balay } 81328e30874SSatish Balay 814d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_4(Mat A, Vec bb, Vec xx) 815d71ae5a4SJacob Faibussowitsch { 81628e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 81728e30874SSatish Balay IS iscol = a->col, isrow = a->row; 81828e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 81928e30874SSatish Balay PetscInt i, nz, idx, idt, idc, m; 82028e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 82128e30874SSatish Balay const MatScalar *aa = a->a, *v; 82228e30874SSatish Balay PetscScalar *x, s1, s2, s3, s4, x1, x2, x3, x4, *t; 82328e30874SSatish Balay const PetscScalar *b; 82428e30874SSatish Balay 82528e30874SSatish Balay PetscFunctionBegin; 8269566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 8279566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 82828e30874SSatish Balay t = a->solve_work; 82928e30874SSatish Balay 8309371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 8319371c9d4SSatish Balay r = rout; 8329371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 8339371c9d4SSatish Balay c = cout; 83428e30874SSatish Balay 83528e30874SSatish Balay /* forward solve the lower triangular */ 83628e30874SSatish Balay idx = 4 * r[0]; 8379371c9d4SSatish Balay t[0] = b[idx]; 8389371c9d4SSatish Balay t[1] = b[1 + idx]; 8399371c9d4SSatish Balay t[2] = b[2 + idx]; 8409371c9d4SSatish Balay t[3] = b[3 + idx]; 84128e30874SSatish Balay for (i = 1; i < n; i++) { 84228e30874SSatish Balay v = aa + 16 * ai[i]; 84328e30874SSatish Balay vi = aj + ai[i]; 84428e30874SSatish Balay nz = ai[i + 1] - ai[i]; 84528e30874SSatish Balay idx = 4 * r[i]; 8469371c9d4SSatish Balay s1 = b[idx]; 8479371c9d4SSatish Balay s2 = b[1 + idx]; 8489371c9d4SSatish Balay s3 = b[2 + idx]; 8499371c9d4SSatish Balay s4 = b[3 + idx]; 85028e30874SSatish Balay for (m = 0; m < nz; m++) { 85128e30874SSatish Balay idx = 4 * vi[m]; 8529371c9d4SSatish Balay x1 = t[idx]; 8539371c9d4SSatish Balay x2 = t[1 + idx]; 8549371c9d4SSatish Balay x3 = t[2 + idx]; 8559371c9d4SSatish Balay x4 = t[3 + idx]; 85628e30874SSatish Balay s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; 85728e30874SSatish Balay s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; 85828e30874SSatish Balay s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; 85928e30874SSatish Balay s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; 86028e30874SSatish Balay v += 16; 86128e30874SSatish Balay } 86228e30874SSatish Balay idx = 4 * i; 8639371c9d4SSatish Balay t[idx] = s1; 8649371c9d4SSatish Balay t[1 + idx] = s2; 8659371c9d4SSatish Balay t[2 + idx] = s3; 8669371c9d4SSatish Balay t[3 + idx] = s4; 86728e30874SSatish Balay } 86828e30874SSatish Balay /* backward solve the upper triangular */ 86928e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 87028e30874SSatish Balay v = aa + 16 * (adiag[i + 1] + 1); 87128e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 87228e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 87328e30874SSatish Balay idt = 4 * i; 8749371c9d4SSatish Balay s1 = t[idt]; 8759371c9d4SSatish Balay s2 = t[1 + idt]; 8769371c9d4SSatish Balay s3 = t[2 + idt]; 8779371c9d4SSatish Balay s4 = t[3 + idt]; 87828e30874SSatish Balay for (m = 0; m < nz; m++) { 87928e30874SSatish Balay idx = 4 * vi[m]; 8809371c9d4SSatish Balay x1 = t[idx]; 8819371c9d4SSatish Balay x2 = t[1 + idx]; 8829371c9d4SSatish Balay x3 = t[2 + idx]; 8839371c9d4SSatish Balay x4 = t[3 + idx]; 88428e30874SSatish Balay s1 -= v[0] * x1 + v[4] * x2 + v[8] * x3 + v[12] * x4; 88528e30874SSatish Balay s2 -= v[1] * x1 + v[5] * x2 + v[9] * x3 + v[13] * x4; 88628e30874SSatish Balay s3 -= v[2] * x1 + v[6] * x2 + v[10] * x3 + v[14] * x4; 88728e30874SSatish Balay s4 -= v[3] * x1 + v[7] * x2 + v[11] * x3 + v[15] * x4; 88828e30874SSatish Balay v += 16; 88928e30874SSatish Balay } 89028e30874SSatish Balay idc = 4 * c[i]; 89128e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[4] * s2 + v[8] * s3 + v[12] * s4; 89228e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[5] * s2 + v[9] * s3 + v[13] * s4; 89328e30874SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[6] * s2 + v[10] * s3 + v[14] * s4; 89428e30874SSatish Balay x[3 + idc] = t[3 + idt] = v[3] * s1 + v[7] * s2 + v[11] * s3 + v[15] * s4; 89528e30874SSatish Balay } 89628e30874SSatish Balay 8979566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 8989566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 8999566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 9009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 9019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 16 * (a->nz) - 4.0 * A->cmap->n)); 9023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 90328e30874SSatish Balay } 90428e30874SSatish Balay 905d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_3_inplace(Mat A, Vec bb, Vec xx) 906d71ae5a4SJacob Faibussowitsch { 90728e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 90828e30874SSatish Balay IS iscol = a->col, isrow = a->row; 90928e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 91028e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 91128e30874SSatish Balay const PetscInt *r, *c, *diag = a->diag, *rout, *cout; 91228e30874SSatish Balay const MatScalar *aa = a->a, *v; 91328e30874SSatish Balay PetscScalar *x, s1, s2, s3, x1, x2, x3, *t; 91428e30874SSatish Balay const PetscScalar *b; 91528e30874SSatish Balay 91628e30874SSatish Balay PetscFunctionBegin; 9179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 9189566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 91928e30874SSatish Balay t = a->solve_work; 92028e30874SSatish Balay 9219371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 9229371c9d4SSatish Balay r = rout; 9239371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 9249371c9d4SSatish Balay c = cout + (n - 1); 92528e30874SSatish Balay 92628e30874SSatish Balay /* forward solve the lower triangular */ 92728e30874SSatish Balay idx = 3 * (*r++); 9289371c9d4SSatish Balay t[0] = b[idx]; 9299371c9d4SSatish Balay t[1] = b[1 + idx]; 9309371c9d4SSatish Balay t[2] = b[2 + idx]; 93128e30874SSatish Balay for (i = 1; i < n; i++) { 93228e30874SSatish Balay v = aa + 9 * ai[i]; 93328e30874SSatish Balay vi = aj + ai[i]; 93428e30874SSatish Balay nz = diag[i] - ai[i]; 93528e30874SSatish Balay idx = 3 * (*r++); 9369371c9d4SSatish Balay s1 = b[idx]; 9379371c9d4SSatish Balay s2 = b[1 + idx]; 9389371c9d4SSatish Balay s3 = b[2 + idx]; 93928e30874SSatish Balay while (nz--) { 94028e30874SSatish Balay idx = 3 * (*vi++); 9419371c9d4SSatish Balay x1 = t[idx]; 9429371c9d4SSatish Balay x2 = t[1 + idx]; 9439371c9d4SSatish Balay x3 = t[2 + idx]; 94428e30874SSatish Balay s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 94528e30874SSatish Balay s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 94628e30874SSatish Balay s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 94728e30874SSatish Balay v += 9; 94828e30874SSatish Balay } 94928e30874SSatish Balay idx = 3 * i; 9509371c9d4SSatish Balay t[idx] = s1; 9519371c9d4SSatish Balay t[1 + idx] = s2; 9529371c9d4SSatish Balay t[2 + idx] = s3; 95328e30874SSatish Balay } 95428e30874SSatish Balay /* backward solve the upper triangular */ 95528e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 95628e30874SSatish Balay v = aa + 9 * diag[i] + 9; 95728e30874SSatish Balay vi = aj + diag[i] + 1; 95828e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 95928e30874SSatish Balay idt = 3 * i; 9609371c9d4SSatish Balay s1 = t[idt]; 9619371c9d4SSatish Balay s2 = t[1 + idt]; 9629371c9d4SSatish Balay s3 = t[2 + idt]; 96328e30874SSatish Balay while (nz--) { 96428e30874SSatish Balay idx = 3 * (*vi++); 9659371c9d4SSatish Balay x1 = t[idx]; 9669371c9d4SSatish Balay x2 = t[1 + idx]; 9679371c9d4SSatish Balay x3 = t[2 + idx]; 96828e30874SSatish Balay s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 96928e30874SSatish Balay s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 97028e30874SSatish Balay s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 97128e30874SSatish Balay v += 9; 97228e30874SSatish Balay } 97328e30874SSatish Balay idc = 3 * (*c--); 97428e30874SSatish Balay v = aa + 9 * diag[i]; 97528e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 97628e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 97728e30874SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 97828e30874SSatish Balay } 9799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 9809566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 9819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 9829566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 9839566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n)); 9843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 98528e30874SSatish Balay } 98628e30874SSatish Balay 987d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_3(Mat A, Vec bb, Vec xx) 988d71ae5a4SJacob Faibussowitsch { 98928e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 99028e30874SSatish Balay IS iscol = a->col, isrow = a->row; 99128e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 99228e30874SSatish Balay PetscInt i, nz, idx, idt, idc, m; 99328e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 99428e30874SSatish Balay const MatScalar *aa = a->a, *v; 99528e30874SSatish Balay PetscScalar *x, s1, s2, s3, x1, x2, x3, *t; 99628e30874SSatish Balay const PetscScalar *b; 99728e30874SSatish Balay 99828e30874SSatish Balay PetscFunctionBegin; 9999566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10009566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 100128e30874SSatish Balay t = a->solve_work; 100228e30874SSatish Balay 10039371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10049371c9d4SSatish Balay r = rout; 10059371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10069371c9d4SSatish Balay c = cout; 100728e30874SSatish Balay 100828e30874SSatish Balay /* forward solve the lower triangular */ 100928e30874SSatish Balay idx = 3 * r[0]; 10109371c9d4SSatish Balay t[0] = b[idx]; 10119371c9d4SSatish Balay t[1] = b[1 + idx]; 10129371c9d4SSatish Balay t[2] = b[2 + idx]; 101328e30874SSatish Balay for (i = 1; i < n; i++) { 101428e30874SSatish Balay v = aa + 9 * ai[i]; 101528e30874SSatish Balay vi = aj + ai[i]; 101628e30874SSatish Balay nz = ai[i + 1] - ai[i]; 101728e30874SSatish Balay idx = 3 * r[i]; 10189371c9d4SSatish Balay s1 = b[idx]; 10199371c9d4SSatish Balay s2 = b[1 + idx]; 10209371c9d4SSatish Balay s3 = b[2 + idx]; 102128e30874SSatish Balay for (m = 0; m < nz; m++) { 102228e30874SSatish Balay idx = 3 * vi[m]; 10239371c9d4SSatish Balay x1 = t[idx]; 10249371c9d4SSatish Balay x2 = t[1 + idx]; 10259371c9d4SSatish Balay x3 = t[2 + idx]; 102628e30874SSatish Balay s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 102728e30874SSatish Balay s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 102828e30874SSatish Balay s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 102928e30874SSatish Balay v += 9; 103028e30874SSatish Balay } 103128e30874SSatish Balay idx = 3 * i; 10329371c9d4SSatish Balay t[idx] = s1; 10339371c9d4SSatish Balay t[1 + idx] = s2; 10349371c9d4SSatish Balay t[2 + idx] = s3; 103528e30874SSatish Balay } 103628e30874SSatish Balay /* backward solve the upper triangular */ 103728e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 103828e30874SSatish Balay v = aa + 9 * (adiag[i + 1] + 1); 103928e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 104028e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 104128e30874SSatish Balay idt = 3 * i; 10429371c9d4SSatish Balay s1 = t[idt]; 10439371c9d4SSatish Balay s2 = t[1 + idt]; 10449371c9d4SSatish Balay s3 = t[2 + idt]; 104528e30874SSatish Balay for (m = 0; m < nz; m++) { 104628e30874SSatish Balay idx = 3 * vi[m]; 10479371c9d4SSatish Balay x1 = t[idx]; 10489371c9d4SSatish Balay x2 = t[1 + idx]; 10499371c9d4SSatish Balay x3 = t[2 + idx]; 105028e30874SSatish Balay s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3; 105128e30874SSatish Balay s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3; 105228e30874SSatish Balay s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3; 105328e30874SSatish Balay v += 9; 105428e30874SSatish Balay } 105528e30874SSatish Balay idc = 3 * c[i]; 105628e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3; 105728e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3; 105828e30874SSatish Balay x[2 + idc] = t[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3; 105928e30874SSatish Balay } 10609566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 10639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 10649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n)); 10653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 106628e30874SSatish Balay } 106728e30874SSatish Balay 1068d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_2_inplace(Mat A, Vec bb, Vec xx) 1069d71ae5a4SJacob Faibussowitsch { 107028e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 107128e30874SSatish Balay IS iscol = a->col, isrow = a->row; 107228e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 107328e30874SSatish Balay PetscInt i, nz, idx, idt, idc; 107428e30874SSatish Balay const PetscInt *r, *c, *diag = a->diag, *rout, *cout; 107528e30874SSatish Balay const MatScalar *aa = a->a, *v; 107628e30874SSatish Balay PetscScalar *x, s1, s2, x1, x2, *t; 107728e30874SSatish Balay const PetscScalar *b; 107828e30874SSatish Balay 107928e30874SSatish Balay PetscFunctionBegin; 10809566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 10819566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 108228e30874SSatish Balay t = a->solve_work; 108328e30874SSatish Balay 10849371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10859371c9d4SSatish Balay r = rout; 10869371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10879371c9d4SSatish Balay c = cout + (n - 1); 108828e30874SSatish Balay 108928e30874SSatish Balay /* forward solve the lower triangular */ 109028e30874SSatish Balay idx = 2 * (*r++); 10919371c9d4SSatish Balay t[0] = b[idx]; 10929371c9d4SSatish Balay t[1] = b[1 + idx]; 109328e30874SSatish Balay for (i = 1; i < n; i++) { 109428e30874SSatish Balay v = aa + 4 * ai[i]; 109528e30874SSatish Balay vi = aj + ai[i]; 109628e30874SSatish Balay nz = diag[i] - ai[i]; 109728e30874SSatish Balay idx = 2 * (*r++); 10989371c9d4SSatish Balay s1 = b[idx]; 10999371c9d4SSatish Balay s2 = b[1 + idx]; 110028e30874SSatish Balay while (nz--) { 110128e30874SSatish Balay idx = 2 * (*vi++); 11029371c9d4SSatish Balay x1 = t[idx]; 11039371c9d4SSatish Balay x2 = t[1 + idx]; 110428e30874SSatish Balay s1 -= v[0] * x1 + v[2] * x2; 110528e30874SSatish Balay s2 -= v[1] * x1 + v[3] * x2; 110628e30874SSatish Balay v += 4; 110728e30874SSatish Balay } 110828e30874SSatish Balay idx = 2 * i; 11099371c9d4SSatish Balay t[idx] = s1; 11109371c9d4SSatish Balay t[1 + idx] = s2; 111128e30874SSatish Balay } 111228e30874SSatish Balay /* backward solve the upper triangular */ 111328e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 111428e30874SSatish Balay v = aa + 4 * diag[i] + 4; 111528e30874SSatish Balay vi = aj + diag[i] + 1; 111628e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 111728e30874SSatish Balay idt = 2 * i; 11189371c9d4SSatish Balay s1 = t[idt]; 11199371c9d4SSatish Balay s2 = t[1 + idt]; 112028e30874SSatish Balay while (nz--) { 112128e30874SSatish Balay idx = 2 * (*vi++); 11229371c9d4SSatish Balay x1 = t[idx]; 11239371c9d4SSatish Balay x2 = t[1 + idx]; 112428e30874SSatish Balay s1 -= v[0] * x1 + v[2] * x2; 112528e30874SSatish Balay s2 -= v[1] * x1 + v[3] * x2; 112628e30874SSatish Balay v += 4; 112728e30874SSatish Balay } 112828e30874SSatish Balay idc = 2 * (*c--); 112928e30874SSatish Balay v = aa + 4 * diag[i]; 113028e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[2] * s2; 113128e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[3] * s2; 113228e30874SSatish Balay } 11339566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11349566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11359566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 11379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n)); 11383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 113928e30874SSatish Balay } 114028e30874SSatish Balay 1141d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_2(Mat A, Vec bb, Vec xx) 1142d71ae5a4SJacob Faibussowitsch { 114328e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 114428e30874SSatish Balay IS iscol = a->col, isrow = a->row; 114528e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 114628e30874SSatish Balay PetscInt i, nz, idx, jdx, idt, idc, m; 114728e30874SSatish Balay const PetscInt *r, *c, *rout, *cout; 114828e30874SSatish Balay const MatScalar *aa = a->a, *v; 114928e30874SSatish Balay PetscScalar *x, s1, s2, x1, x2, *t; 115028e30874SSatish Balay const PetscScalar *b; 115128e30874SSatish Balay 115228e30874SSatish Balay PetscFunctionBegin; 11539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11549566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 115528e30874SSatish Balay t = a->solve_work; 115628e30874SSatish Balay 11579371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11589371c9d4SSatish Balay r = rout; 11599371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11609371c9d4SSatish Balay c = cout; 116128e30874SSatish Balay 116228e30874SSatish Balay /* forward solve the lower triangular */ 116328e30874SSatish Balay idx = 2 * r[0]; 11649371c9d4SSatish Balay t[0] = b[idx]; 11659371c9d4SSatish Balay t[1] = b[1 + idx]; 116628e30874SSatish Balay for (i = 1; i < n; i++) { 116728e30874SSatish Balay v = aa + 4 * ai[i]; 116828e30874SSatish Balay vi = aj + ai[i]; 116928e30874SSatish Balay nz = ai[i + 1] - ai[i]; 117028e30874SSatish Balay idx = 2 * r[i]; 11719371c9d4SSatish Balay s1 = b[idx]; 11729371c9d4SSatish Balay s2 = b[1 + idx]; 117328e30874SSatish Balay for (m = 0; m < nz; m++) { 117428e30874SSatish Balay jdx = 2 * vi[m]; 11759371c9d4SSatish Balay x1 = t[jdx]; 11769371c9d4SSatish Balay x2 = t[1 + jdx]; 117728e30874SSatish Balay s1 -= v[0] * x1 + v[2] * x2; 117828e30874SSatish Balay s2 -= v[1] * x1 + v[3] * x2; 117928e30874SSatish Balay v += 4; 118028e30874SSatish Balay } 118128e30874SSatish Balay idx = 2 * i; 11829371c9d4SSatish Balay t[idx] = s1; 11839371c9d4SSatish Balay t[1 + idx] = s2; 118428e30874SSatish Balay } 118528e30874SSatish Balay /* backward solve the upper triangular */ 118628e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 118728e30874SSatish Balay v = aa + 4 * (adiag[i + 1] + 1); 118828e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 118928e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 119028e30874SSatish Balay idt = 2 * i; 11919371c9d4SSatish Balay s1 = t[idt]; 11929371c9d4SSatish Balay s2 = t[1 + idt]; 119328e30874SSatish Balay for (m = 0; m < nz; m++) { 119428e30874SSatish Balay idx = 2 * vi[m]; 11959371c9d4SSatish Balay x1 = t[idx]; 11969371c9d4SSatish Balay x2 = t[1 + idx]; 119728e30874SSatish Balay s1 -= v[0] * x1 + v[2] * x2; 119828e30874SSatish Balay s2 -= v[1] * x1 + v[3] * x2; 119928e30874SSatish Balay v += 4; 120028e30874SSatish Balay } 120128e30874SSatish Balay idc = 2 * c[i]; 120228e30874SSatish Balay x[idc] = t[idt] = v[0] * s1 + v[2] * s2; 120328e30874SSatish Balay x[1 + idc] = t[1 + idt] = v[1] * s1 + v[3] * s2; 120428e30874SSatish Balay } 12059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12089566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12099566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n)); 12103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 121128e30874SSatish Balay } 121228e30874SSatish Balay 1213d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_1_inplace(Mat A, Vec bb, Vec xx) 1214d71ae5a4SJacob Faibussowitsch { 121528e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 121628e30874SSatish Balay IS iscol = a->col, isrow = a->row; 121728e30874SSatish Balay const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j; 121828e30874SSatish Balay PetscInt i, nz; 121928e30874SSatish Balay const PetscInt *r, *c, *diag = a->diag, *rout, *cout; 122028e30874SSatish Balay const MatScalar *aa = a->a, *v; 122128e30874SSatish Balay PetscScalar *x, s1, *t; 122228e30874SSatish Balay const PetscScalar *b; 122328e30874SSatish Balay 122428e30874SSatish Balay PetscFunctionBegin; 12253ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 122628e30874SSatish Balay 12279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12289566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 122928e30874SSatish Balay t = a->solve_work; 123028e30874SSatish Balay 12319371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12329371c9d4SSatish Balay r = rout; 12339371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12349371c9d4SSatish Balay c = cout + (n - 1); 123528e30874SSatish Balay 123628e30874SSatish Balay /* forward solve the lower triangular */ 123728e30874SSatish Balay t[0] = b[*r++]; 123828e30874SSatish Balay for (i = 1; i < n; i++) { 123928e30874SSatish Balay v = aa + ai[i]; 124028e30874SSatish Balay vi = aj + ai[i]; 124128e30874SSatish Balay nz = diag[i] - ai[i]; 124228e30874SSatish Balay s1 = b[*r++]; 1243ad540459SPierre Jolivet while (nz--) s1 -= (*v++) * t[*vi++]; 124428e30874SSatish Balay t[i] = s1; 124528e30874SSatish Balay } 124628e30874SSatish Balay /* backward solve the upper triangular */ 124728e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 124828e30874SSatish Balay v = aa + diag[i] + 1; 124928e30874SSatish Balay vi = aj + diag[i] + 1; 125028e30874SSatish Balay nz = ai[i + 1] - diag[i] - 1; 125128e30874SSatish Balay s1 = t[i]; 1252ad540459SPierre Jolivet while (nz--) s1 -= (*v++) * t[*vi++]; 125328e30874SSatish Balay x[*c--] = t[i] = aa[diag[i]] * s1; 125428e30874SSatish Balay } 125528e30874SSatish Balay 12569566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12579566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12609566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 1 * (a->nz) - A->cmap->n)); 12613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 126228e30874SSatish Balay } 126328e30874SSatish Balay 1264d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_1(Mat A, Vec bb, Vec xx) 1265d71ae5a4SJacob Faibussowitsch { 126628e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 126728e30874SSatish Balay IS iscol = a->col, isrow = a->row; 126828e30874SSatish Balay PetscInt i, n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 126928e30874SSatish Balay const PetscInt *rout, *cout, *r, *c; 127028e30874SSatish Balay PetscScalar *x, *tmp, sum; 127128e30874SSatish Balay const PetscScalar *b; 127228e30874SSatish Balay const MatScalar *aa = a->a, *v; 127328e30874SSatish Balay 127428e30874SSatish Balay PetscFunctionBegin; 12753ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 127628e30874SSatish Balay 12779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12789566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 127928e30874SSatish Balay tmp = a->solve_work; 128028e30874SSatish Balay 12819371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12829371c9d4SSatish Balay r = rout; 12839371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12849371c9d4SSatish Balay c = cout; 128528e30874SSatish Balay 128628e30874SSatish Balay /* forward solve the lower triangular */ 128728e30874SSatish Balay tmp[0] = b[r[0]]; 128828e30874SSatish Balay v = aa; 128928e30874SSatish Balay vi = aj; 129028e30874SSatish Balay for (i = 1; i < n; i++) { 129128e30874SSatish Balay nz = ai[i + 1] - ai[i]; 129228e30874SSatish Balay sum = b[r[i]]; 129328e30874SSatish Balay PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 129428e30874SSatish Balay tmp[i] = sum; 12959371c9d4SSatish Balay v += nz; 12969371c9d4SSatish Balay vi += nz; 129728e30874SSatish Balay } 129828e30874SSatish Balay 129928e30874SSatish Balay /* backward solve the upper triangular */ 130028e30874SSatish Balay for (i = n - 1; i >= 0; i--) { 130128e30874SSatish Balay v = aa + adiag[i + 1] + 1; 130228e30874SSatish Balay vi = aj + adiag[i + 1] + 1; 130328e30874SSatish Balay nz = adiag[i] - adiag[i + 1] - 1; 130428e30874SSatish Balay sum = tmp[i]; 130528e30874SSatish Balay PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 130628e30874SSatish Balay x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 130728e30874SSatish Balay } 130828e30874SSatish Balay 13099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13109566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13129566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 131528e30874SSatish Balay } 1316