1c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 2af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 381278733SSatish Balay 481278733SSatish Balay /* 581278733SSatish Balay Version for when blocks are 7 by 7 Using natural ordering 681278733SSatish Balay */ 7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 8d71ae5a4SJacob Faibussowitsch { 981278733SSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1013f74950SBarry Smith PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1113f74950SBarry Smith PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1281278733SSatish Balay MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 131b3064deSBarry Smith MatScalar *u, *d, *w, *wp, u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12; 141b3064deSBarry Smith MatScalar u13, u14, u15, u16, u17, u18, u19, u20, u21, u22, u23, u24, u25, u26, u27; 151b3064deSBarry Smith MatScalar u28, u29, u30, u31, u32, u33, u34, u35, u36, u37, u38, u39, u40, u41; 161b3064deSBarry Smith MatScalar u42, u43, u44, u45, u46, u47, u48; 17182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 18a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 1981278733SSatish Balay 2081278733SSatish Balay PetscFunctionBegin; 2181278733SSatish Balay /* initialization */ 220164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(49 * mbs, &w)); 249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 256df5ee2eSHong Zhang il[0] = 0; 266df5ee2eSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 276df5ee2eSHong Zhang 289566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(49, &dk, 49, &uik)); 299371c9d4SSatish Balay ai = a->i; 309371c9d4SSatish Balay aj = a->j; 319371c9d4SSatish Balay aa = a->a; 3281278733SSatish Balay 3381278733SSatish Balay /* for each row k */ 3481278733SSatish Balay for (k = 0; k < mbs; k++) { 3581278733SSatish Balay /*initialize k-th row with elements nonzero in row k of A */ 369371c9d4SSatish Balay jmin = ai[k]; 379371c9d4SSatish Balay jmax = ai[k + 1]; 3881278733SSatish Balay if (jmin < jmax) { 3981278733SSatish Balay ap = aa + jmin * 49; 4081278733SSatish Balay for (j = jmin; j < jmax; j++) { 4181278733SSatish Balay vj = aj[j]; /* block col. index */ 4281278733SSatish Balay wp = w + vj * 49; 439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(wp, ap, 49)); 44580bdb30SBarry Smith ap += 49; 4581278733SSatish Balay } 4681278733SSatish Balay } 4781278733SSatish Balay 4881278733SSatish Balay /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 499566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(dk, w + k * 49, 49)); 5081278733SSatish Balay i = jl[k]; /* first row to be added to k_th row */ 5181278733SSatish Balay 5281278733SSatish Balay while (i < mbs) { 5381278733SSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 5481278733SSatish Balay 5581278733SSatish Balay /* compute multiplier */ 5681278733SSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 5781278733SSatish Balay 5881278733SSatish Balay /* uik = -inv(Di)*U_bar(i,k) */ 5981278733SSatish Balay d = ba + i * 49; 6081278733SSatish Balay u = ba + ili * 49; 6181278733SSatish Balay 629371c9d4SSatish Balay u0 = u[0]; 639371c9d4SSatish Balay u1 = u[1]; 649371c9d4SSatish Balay u2 = u[2]; 659371c9d4SSatish Balay u3 = u[3]; 669371c9d4SSatish Balay u4 = u[4]; 679371c9d4SSatish Balay u5 = u[5]; 689371c9d4SSatish Balay u6 = u[6]; 699371c9d4SSatish Balay u7 = u[7]; 709371c9d4SSatish Balay u8 = u[8]; 719371c9d4SSatish Balay u9 = u[9]; 729371c9d4SSatish Balay u10 = u[10]; 739371c9d4SSatish Balay u11 = u[11]; 749371c9d4SSatish Balay u12 = u[12]; 759371c9d4SSatish Balay u13 = u[13]; 769371c9d4SSatish Balay u14 = u[14]; 779371c9d4SSatish Balay u15 = u[15]; 789371c9d4SSatish Balay u16 = u[16]; 799371c9d4SSatish Balay u17 = u[17]; 809371c9d4SSatish Balay u18 = u[18]; 819371c9d4SSatish Balay u19 = u[19]; 829371c9d4SSatish Balay u20 = u[20]; 839371c9d4SSatish Balay u21 = u[21]; 849371c9d4SSatish Balay u22 = u[22]; 859371c9d4SSatish Balay u23 = u[23]; 869371c9d4SSatish Balay u24 = u[24]; 879371c9d4SSatish Balay u25 = u[25]; 889371c9d4SSatish Balay u26 = u[26]; 899371c9d4SSatish Balay u27 = u[27]; 909371c9d4SSatish Balay u28 = u[28]; 919371c9d4SSatish Balay u29 = u[29]; 929371c9d4SSatish Balay u30 = u[30]; 939371c9d4SSatish Balay u31 = u[31]; 949371c9d4SSatish Balay u32 = u[32]; 959371c9d4SSatish Balay u33 = u[33]; 969371c9d4SSatish Balay u34 = u[34]; 979371c9d4SSatish Balay u35 = u[35]; 989371c9d4SSatish Balay u36 = u[36]; 999371c9d4SSatish Balay u37 = u[37]; 1009371c9d4SSatish Balay u38 = u[38]; 1019371c9d4SSatish Balay u39 = u[39]; 1029371c9d4SSatish Balay u40 = u[40]; 1039371c9d4SSatish Balay u41 = u[41]; 1049371c9d4SSatish Balay u42 = u[42]; 1059371c9d4SSatish Balay u43 = u[43]; 1069371c9d4SSatish Balay u44 = u[44]; 1079371c9d4SSatish Balay u45 = u[45]; 1089371c9d4SSatish Balay u46 = u[46]; 1099371c9d4SSatish Balay u47 = u[47]; 1109371c9d4SSatish Balay u48 = u[48]; 11181278733SSatish Balay 1121b3064deSBarry Smith uik[0] = -(d[0] * u0 + d[7] * u1 + d[14] * u2 + d[21] * u3 + d[28] * u4 + d[35] * u5 + d[42] * u6); 1131b3064deSBarry Smith uik[1] = -(d[1] * u0 + d[8] * u1 + d[15] * u2 + d[22] * u3 + d[29] * u4 + d[36] * u5 + d[43] * u6); 1141b3064deSBarry Smith uik[2] = -(d[2] * u0 + d[9] * u1 + d[16] * u2 + d[23] * u3 + d[30] * u4 + d[37] * u5 + d[44] * u6); 1151b3064deSBarry Smith uik[3] = -(d[3] * u0 + d[10] * u1 + d[17] * u2 + d[24] * u3 + d[31] * u4 + d[38] * u5 + d[45] * u6); 1161b3064deSBarry Smith uik[4] = -(d[4] * u0 + d[11] * u1 + d[18] * u2 + d[25] * u3 + d[32] * u4 + d[39] * u5 + d[46] * u6); 1171b3064deSBarry Smith uik[5] = -(d[5] * u0 + d[12] * u1 + d[19] * u2 + d[26] * u3 + d[33] * u4 + d[40] * u5 + d[47] * u6); 1181b3064deSBarry Smith uik[6] = -(d[6] * u0 + d[13] * u1 + d[20] * u2 + d[27] * u3 + d[34] * u4 + d[41] * u5 + d[48] * u6); 11981278733SSatish Balay 1201b3064deSBarry Smith uik[7] = -(d[0] * u7 + d[7] * u8 + d[14] * u9 + d[21] * u10 + d[28] * u11 + d[35] * u12 + d[42] * u13); 1211b3064deSBarry Smith uik[8] = -(d[1] * u7 + d[8] * u8 + d[15] * u9 + d[22] * u10 + d[29] * u11 + d[36] * u12 + d[43] * u13); 1221b3064deSBarry Smith uik[9] = -(d[2] * u7 + d[9] * u8 + d[16] * u9 + d[23] * u10 + d[30] * u11 + d[37] * u12 + d[44] * u13); 1231b3064deSBarry Smith uik[10] = -(d[3] * u7 + d[10] * u8 + d[17] * u9 + d[24] * u10 + d[31] * u11 + d[38] * u12 + d[45] * u13); 1241b3064deSBarry Smith uik[11] = -(d[4] * u7 + d[11] * u8 + d[18] * u9 + d[25] * u10 + d[32] * u11 + d[39] * u12 + d[46] * u13); 1251b3064deSBarry Smith uik[12] = -(d[5] * u7 + d[12] * u8 + d[19] * u9 + d[26] * u10 + d[33] * u11 + d[40] * u12 + d[47] * u13); 1261b3064deSBarry Smith uik[13] = -(d[6] * u7 + d[13] * u8 + d[20] * u9 + d[27] * u10 + d[34] * u11 + d[41] * u12 + d[48] * u13); 12781278733SSatish Balay 1281b3064deSBarry Smith uik[14] = -(d[0] * u14 + d[7] * u15 + d[14] * u16 + d[21] * u17 + d[28] * u18 + d[35] * u19 + d[42] * u20); 1291b3064deSBarry Smith uik[15] = -(d[1] * u14 + d[8] * u15 + d[15] * u16 + d[22] * u17 + d[29] * u18 + d[36] * u19 + d[43] * u20); 1301b3064deSBarry Smith uik[16] = -(d[2] * u14 + d[9] * u15 + d[16] * u16 + d[23] * u17 + d[30] * u18 + d[37] * u19 + d[44] * u20); 1311b3064deSBarry Smith uik[17] = -(d[3] * u14 + d[10] * u15 + d[17] * u16 + d[24] * u17 + d[31] * u18 + d[38] * u19 + d[45] * u20); 1321b3064deSBarry Smith uik[18] = -(d[4] * u14 + d[11] * u15 + d[18] * u16 + d[25] * u17 + d[32] * u18 + d[39] * u19 + d[46] * u20); 1331b3064deSBarry Smith uik[19] = -(d[5] * u14 + d[12] * u15 + d[19] * u16 + d[26] * u17 + d[33] * u18 + d[40] * u19 + d[47] * u20); 1341b3064deSBarry Smith uik[20] = -(d[6] * u14 + d[13] * u15 + d[20] * u16 + d[27] * u17 + d[34] * u18 + d[41] * u19 + d[48] * u20); 13581278733SSatish Balay 1361b3064deSBarry Smith uik[21] = -(d[0] * u21 + d[7] * u22 + d[14] * u23 + d[21] * u24 + d[28] * u25 + d[35] * u26 + d[42] * u27); 1371b3064deSBarry Smith uik[22] = -(d[1] * u21 + d[8] * u22 + d[15] * u23 + d[22] * u24 + d[29] * u25 + d[36] * u26 + d[43] * u27); 1381b3064deSBarry Smith uik[23] = -(d[2] * u21 + d[9] * u22 + d[16] * u23 + d[23] * u24 + d[30] * u25 + d[37] * u26 + d[44] * u27); 1391b3064deSBarry Smith uik[24] = -(d[3] * u21 + d[10] * u22 + d[17] * u23 + d[24] * u24 + d[31] * u25 + d[38] * u26 + d[45] * u27); 1401b3064deSBarry Smith uik[25] = -(d[4] * u21 + d[11] * u22 + d[18] * u23 + d[25] * u24 + d[32] * u25 + d[39] * u26 + d[46] * u27); 1411b3064deSBarry Smith uik[26] = -(d[5] * u21 + d[12] * u22 + d[19] * u23 + d[26] * u24 + d[33] * u25 + d[40] * u26 + d[47] * u27); 1421b3064deSBarry Smith uik[27] = -(d[6] * u21 + d[13] * u22 + d[20] * u23 + d[27] * u24 + d[34] * u25 + d[41] * u26 + d[48] * u27); 14381278733SSatish Balay 1441b3064deSBarry Smith uik[28] = -(d[0] * u28 + d[7] * u29 + d[14] * u30 + d[21] * u31 + d[28] * u32 + d[35] * u33 + d[42] * u34); 1451b3064deSBarry Smith uik[29] = -(d[1] * u28 + d[8] * u29 + d[15] * u30 + d[22] * u31 + d[29] * u32 + d[36] * u33 + d[43] * u34); 1461b3064deSBarry Smith uik[30] = -(d[2] * u28 + d[9] * u29 + d[16] * u30 + d[23] * u31 + d[30] * u32 + d[37] * u33 + d[44] * u34); 1471b3064deSBarry Smith uik[31] = -(d[3] * u28 + d[10] * u29 + d[17] * u30 + d[24] * u31 + d[31] * u32 + d[38] * u33 + d[45] * u34); 1481b3064deSBarry Smith uik[32] = -(d[4] * u28 + d[11] * u29 + d[18] * u30 + d[25] * u31 + d[32] * u32 + d[39] * u33 + d[46] * u34); 1491b3064deSBarry Smith uik[33] = -(d[5] * u28 + d[12] * u29 + d[19] * u30 + d[26] * u31 + d[33] * u32 + d[40] * u33 + d[47] * u34); 1501b3064deSBarry Smith uik[34] = -(d[6] * u28 + d[13] * u29 + d[20] * u30 + d[27] * u31 + d[34] * u32 + d[41] * u33 + d[48] * u34); 15181278733SSatish Balay 1521b3064deSBarry Smith uik[35] = -(d[0] * u35 + d[7] * u36 + d[14] * u37 + d[21] * u38 + d[28] * u39 + d[35] * u40 + d[42] * u41); 1531b3064deSBarry Smith uik[36] = -(d[1] * u35 + d[8] * u36 + d[15] * u37 + d[22] * u38 + d[29] * u39 + d[36] * u40 + d[43] * u41); 1541b3064deSBarry Smith uik[37] = -(d[2] * u35 + d[9] * u36 + d[16] * u37 + d[23] * u38 + d[30] * u39 + d[37] * u40 + d[44] * u41); 1551b3064deSBarry Smith uik[38] = -(d[3] * u35 + d[10] * u36 + d[17] * u37 + d[24] * u38 + d[31] * u39 + d[38] * u40 + d[45] * u41); 1561b3064deSBarry Smith uik[39] = -(d[4] * u35 + d[11] * u36 + d[18] * u37 + d[25] * u38 + d[32] * u39 + d[39] * u40 + d[46] * u41); 1571b3064deSBarry Smith uik[40] = -(d[5] * u35 + d[12] * u36 + d[19] * u37 + d[26] * u38 + d[33] * u39 + d[40] * u40 + d[47] * u41); 1581b3064deSBarry Smith uik[41] = -(d[6] * u35 + d[13] * u36 + d[20] * u37 + d[27] * u38 + d[34] * u39 + d[41] * u40 + d[48] * u41); 1591b3064deSBarry Smith 1601b3064deSBarry Smith uik[42] = -(d[0] * u42 + d[7] * u43 + d[14] * u44 + d[21] * u45 + d[28] * u46 + d[35] * u47 + d[42] * u48); 1611b3064deSBarry Smith uik[43] = -(d[1] * u42 + d[8] * u43 + d[15] * u44 + d[22] * u45 + d[29] * u46 + d[36] * u47 + d[43] * u48); 1621b3064deSBarry Smith uik[44] = -(d[2] * u42 + d[9] * u43 + d[16] * u44 + d[23] * u45 + d[30] * u46 + d[37] * u47 + d[44] * u48); 1631b3064deSBarry Smith uik[45] = -(d[3] * u42 + d[10] * u43 + d[17] * u44 + d[24] * u45 + d[31] * u46 + d[38] * u47 + d[45] * u48); 1641b3064deSBarry Smith uik[46] = -(d[4] * u42 + d[11] * u43 + d[18] * u44 + d[25] * u45 + d[32] * u46 + d[39] * u47 + d[46] * u48); 1651b3064deSBarry Smith uik[47] = -(d[5] * u42 + d[12] * u43 + d[19] * u44 + d[26] * u45 + d[33] * u46 + d[40] * u47 + d[47] * u48); 1661b3064deSBarry Smith uik[48] = -(d[6] * u42 + d[13] * u43 + d[20] * u44 + d[27] * u45 + d[34] * u46 + d[41] * u47 + d[48] * u48); 16781278733SSatish Balay 16881278733SSatish Balay /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 1691b3064deSBarry Smith dk[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5 + uik[6] * u6; 1701b3064deSBarry Smith dk[1] += uik[7] * u0 + uik[8] * u1 + uik[9] * u2 + uik[10] * u3 + uik[11] * u4 + uik[12] * u5 + uik[13] * u6; 1711b3064deSBarry Smith dk[2] += uik[14] * u0 + uik[15] * u1 + uik[16] * u2 + uik[17] * u3 + uik[18] * u4 + uik[19] * u5 + uik[20] * u6; 1721b3064deSBarry Smith dk[3] += uik[21] * u0 + uik[22] * u1 + uik[23] * u2 + uik[24] * u3 + uik[25] * u4 + uik[26] * u5 + uik[27] * u6; 1731b3064deSBarry Smith dk[4] += uik[28] * u0 + uik[29] * u1 + uik[30] * u2 + uik[31] * u3 + uik[32] * u4 + uik[33] * u5 + uik[34] * u6; 1741b3064deSBarry Smith dk[5] += uik[35] * u0 + uik[36] * u1 + uik[37] * u2 + uik[38] * u3 + uik[39] * u4 + uik[40] * u5 + uik[41] * u6; 1751b3064deSBarry Smith dk[6] += uik[42] * u0 + uik[43] * u1 + uik[44] * u2 + uik[45] * u3 + uik[46] * u4 + uik[47] * u5 + uik[48] * u6; 17681278733SSatish Balay 1771b3064deSBarry Smith dk[7] += uik[0] * u7 + uik[1] * u8 + uik[2] * u9 + uik[3] * u10 + uik[4] * u11 + uik[5] * u12 + uik[6] * u13; 1781b3064deSBarry Smith dk[8] += uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11 + uik[12] * u12 + uik[13] * u13; 1791b3064deSBarry Smith dk[9] += uik[14] * u7 + uik[15] * u8 + uik[16] * u9 + uik[17] * u10 + uik[18] * u11 + uik[19] * u12 + uik[20] * u13; 1801b3064deSBarry Smith dk[10] += uik[21] * u7 + uik[22] * u8 + uik[23] * u9 + uik[24] * u10 + uik[25] * u11 + uik[26] * u12 + uik[27] * u13; 1811b3064deSBarry Smith dk[11] += uik[28] * u7 + uik[29] * u8 + uik[30] * u9 + uik[31] * u10 + uik[32] * u11 + uik[33] * u12 + uik[34] * u13; 1821b3064deSBarry Smith dk[12] += uik[35] * u7 + uik[36] * u8 + uik[37] * u9 + uik[38] * u10 + uik[39] * u11 + uik[40] * u12 + uik[41] * u13; 1831b3064deSBarry Smith dk[13] += uik[42] * u7 + uik[43] * u8 + uik[44] * u9 + uik[45] * u10 + uik[46] * u11 + uik[47] * u12 + uik[48] * u13; 18481278733SSatish Balay 1851b3064deSBarry Smith dk[14] += uik[0] * u14 + uik[1] * u15 + uik[2] * u16 + uik[3] * u17 + uik[4] * u18 + uik[5] * u19 + uik[6] * u20; 1861b3064deSBarry Smith dk[15] += uik[7] * u14 + uik[8] * u15 + uik[9] * u16 + uik[10] * u17 + uik[11] * u18 + uik[12] * u19 + uik[13] * u20; 1871b3064deSBarry Smith dk[16] += uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17 + uik[18] * u18 + uik[19] * u19 + uik[20] * u20; 1881b3064deSBarry Smith dk[17] += uik[21] * u14 + uik[22] * u15 + uik[23] * u16 + uik[24] * u17 + uik[25] * u18 + uik[26] * u19 + uik[27] * u20; 1891b3064deSBarry Smith dk[18] += uik[28] * u14 + uik[29] * u15 + uik[30] * u16 + uik[31] * u17 + uik[32] * u18 + uik[33] * u19 + uik[34] * u20; 1901b3064deSBarry Smith dk[19] += uik[35] * u14 + uik[36] * u15 + uik[37] * u16 + uik[38] * u17 + uik[39] * u18 + uik[40] * u19 + uik[41] * u20; 1911b3064deSBarry Smith dk[20] += uik[42] * u14 + uik[43] * u15 + uik[44] * u16 + uik[45] * u17 + uik[46] * u18 + uik[47] * u19 + uik[48] * u20; 19281278733SSatish Balay 1931b3064deSBarry Smith dk[21] += uik[0] * u21 + uik[1] * u22 + uik[2] * u23 + uik[3] * u24 + uik[4] * u25 + uik[5] * u26 + uik[6] * u27; 1941b3064deSBarry Smith dk[22] += uik[7] * u21 + uik[8] * u22 + uik[9] * u23 + uik[10] * u24 + uik[11] * u25 + uik[12] * u26 + uik[13] * u27; 1951b3064deSBarry Smith dk[23] += uik[14] * u21 + uik[15] * u22 + uik[16] * u23 + uik[17] * u24 + uik[18] * u25 + uik[19] * u26 + uik[20] * u27; 1961b3064deSBarry Smith dk[24] += uik[21] * u21 + uik[22] * u22 + uik[23] * u23 + uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27; 1971b3064deSBarry Smith dk[25] += uik[28] * u21 + uik[29] * u22 + uik[30] * u23 + uik[31] * u24 + uik[32] * u25 + uik[33] * u26 + uik[34] * u27; 1981b3064deSBarry Smith dk[26] += uik[35] * u21 + uik[36] * u22 + uik[37] * u23 + uik[38] * u24 + uik[39] * u25 + uik[40] * u26 + uik[41] * u27; 1991b3064deSBarry Smith dk[27] += uik[42] * u21 + uik[43] * u22 + uik[44] * u23 + uik[45] * u24 + uik[46] * u25 + uik[47] * u26 + uik[48] * u27; 20081278733SSatish Balay 2011b3064deSBarry Smith dk[28] += uik[0] * u28 + uik[1] * u29 + uik[2] * u30 + uik[3] * u31 + uik[4] * u32 + uik[5] * u33 + uik[6] * u34; 2021b3064deSBarry Smith dk[29] += uik[7] * u28 + uik[8] * u29 + uik[9] * u30 + uik[10] * u31 + uik[11] * u32 + uik[12] * u33 + uik[13] * u34; 2031b3064deSBarry Smith dk[30] += uik[14] * u28 + uik[15] * u29 + uik[16] * u30 + uik[17] * u31 + uik[18] * u32 + uik[19] * u33 + uik[20] * u34; 2041b3064deSBarry Smith dk[31] += uik[21] * u28 + uik[22] * u29 + uik[23] * u30 + uik[24] * u31 + uik[25] * u32 + uik[26] * u33 + uik[27] * u34; 2051b3064deSBarry Smith dk[32] += uik[28] * u28 + uik[29] * u29 + uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34; 2061b3064deSBarry Smith dk[33] += uik[35] * u28 + uik[36] * u29 + uik[37] * u30 + uik[38] * u31 + uik[39] * u32 + uik[40] * u33 + uik[41] * u34; 2071b3064deSBarry Smith dk[34] += uik[42] * u28 + uik[43] * u29 + uik[44] * u30 + uik[45] * u31 + uik[46] * u32 + uik[47] * u33 + uik[48] * u34; 20881278733SSatish Balay 2091b3064deSBarry Smith dk[35] += uik[0] * u35 + uik[1] * u36 + uik[2] * u37 + uik[3] * u38 + uik[4] * u39 + uik[5] * u40 + uik[6] * u41; 2101b3064deSBarry Smith dk[36] += uik[7] * u35 + uik[8] * u36 + uik[9] * u37 + uik[10] * u38 + uik[11] * u39 + uik[12] * u40 + uik[13] * u41; 2111b3064deSBarry Smith dk[37] += uik[14] * u35 + uik[15] * u36 + uik[16] * u37 + uik[17] * u38 + uik[18] * u39 + uik[19] * u40 + uik[20] * u41; 2121b3064deSBarry Smith dk[38] += uik[21] * u35 + uik[22] * u36 + uik[23] * u37 + uik[24] * u38 + uik[25] * u39 + uik[26] * u40 + uik[27] * u41; 2131b3064deSBarry Smith dk[39] += uik[28] * u35 + uik[29] * u36 + uik[30] * u37 + uik[31] * u38 + uik[32] * u39 + uik[33] * u40 + uik[34] * u41; 2141b3064deSBarry Smith dk[40] += uik[35] * u35 + uik[36] * u36 + uik[37] * u37 + uik[38] * u38 + uik[39] * u39 + uik[40] * u40 + uik[41] * u41; 2151b3064deSBarry Smith dk[41] += uik[42] * u35 + uik[43] * u36 + uik[44] * u37 + uik[45] * u38 + uik[46] * u39 + uik[47] * u40 + uik[48] * u41; 21681278733SSatish Balay 2171b3064deSBarry Smith dk[42] += uik[0] * u42 + uik[1] * u43 + uik[2] * u44 + uik[3] * u45 + uik[4] * u46 + uik[5] * u47 + uik[6] * u48; 2181b3064deSBarry Smith dk[43] += uik[7] * u42 + uik[8] * u43 + uik[9] * u44 + uik[10] * u45 + uik[11] * u46 + uik[12] * u47 + uik[13] * u48; 2191b3064deSBarry Smith dk[44] += uik[14] * u42 + uik[15] * u43 + uik[16] * u44 + uik[17] * u45 + uik[18] * u46 + uik[19] * u47 + uik[20] * u48; 2201b3064deSBarry Smith dk[45] += uik[21] * u42 + uik[22] * u43 + uik[23] * u44 + uik[24] * u45 + uik[25] * u46 + uik[26] * u47 + uik[27] * u48; 2211b3064deSBarry Smith dk[46] += uik[28] * u42 + uik[29] * u43 + uik[30] * u44 + uik[31] * u45 + uik[32] * u46 + uik[33] * u47 + uik[34] * u48; 2221b3064deSBarry Smith dk[47] += uik[35] * u42 + uik[36] * u43 + uik[37] * u44 + uik[38] * u45 + uik[39] * u46 + uik[40] * u47 + uik[41] * u48; 2231b3064deSBarry Smith dk[48] += uik[42] * u42 + uik[43] * u43 + uik[44] * u44 + uik[45] * u45 + uik[46] * u46 + uik[47] * u47 + uik[48] * u48; 22481278733SSatish Balay 2259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(343.0 * 4.0)); 226187a9f4bSHong Zhang 22781278733SSatish Balay /* update -U(i,k) */ 2289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ba + ili * 49, uik, 49)); 22981278733SSatish Balay 23081278733SSatish Balay /* add multiple of row i to k-th row ... */ 2319371c9d4SSatish Balay jmin = ili + 1; 2329371c9d4SSatish Balay jmax = bi[i + 1]; 23381278733SSatish Balay if (jmin < jmax) { 23481278733SSatish Balay for (j = jmin; j < jmax; j++) { 23581278733SSatish Balay /* w += -U(i,k)^T * U_bar(i,j) */ 23681278733SSatish Balay wp = w + bj[j] * 49; 23781278733SSatish Balay u = ba + j * 49; 23881278733SSatish Balay 2399371c9d4SSatish Balay u0 = u[0]; 2409371c9d4SSatish Balay u1 = u[1]; 2419371c9d4SSatish Balay u2 = u[2]; 2429371c9d4SSatish Balay u3 = u[3]; 2439371c9d4SSatish Balay u4 = u[4]; 2449371c9d4SSatish Balay u5 = u[5]; 2459371c9d4SSatish Balay u6 = u[6]; 2469371c9d4SSatish Balay u7 = u[7]; 2479371c9d4SSatish Balay u8 = u[8]; 2489371c9d4SSatish Balay u9 = u[9]; 2499371c9d4SSatish Balay u10 = u[10]; 2509371c9d4SSatish Balay u11 = u[11]; 2519371c9d4SSatish Balay u12 = u[12]; 2529371c9d4SSatish Balay u13 = u[13]; 2539371c9d4SSatish Balay u14 = u[14]; 2549371c9d4SSatish Balay u15 = u[15]; 2559371c9d4SSatish Balay u16 = u[16]; 2569371c9d4SSatish Balay u17 = u[17]; 2579371c9d4SSatish Balay u18 = u[18]; 2589371c9d4SSatish Balay u19 = u[19]; 2599371c9d4SSatish Balay u20 = u[20]; 2609371c9d4SSatish Balay u21 = u[21]; 2619371c9d4SSatish Balay u22 = u[22]; 2629371c9d4SSatish Balay u23 = u[23]; 2639371c9d4SSatish Balay u24 = u[24]; 2649371c9d4SSatish Balay u25 = u[25]; 2659371c9d4SSatish Balay u26 = u[26]; 2669371c9d4SSatish Balay u27 = u[27]; 2679371c9d4SSatish Balay u28 = u[28]; 2689371c9d4SSatish Balay u29 = u[29]; 2699371c9d4SSatish Balay u30 = u[30]; 2709371c9d4SSatish Balay u31 = u[31]; 2719371c9d4SSatish Balay u32 = u[32]; 2729371c9d4SSatish Balay u33 = u[33]; 2739371c9d4SSatish Balay u34 = u[34]; 2749371c9d4SSatish Balay u35 = u[35]; 2759371c9d4SSatish Balay u36 = u[36]; 2769371c9d4SSatish Balay u37 = u[37]; 2779371c9d4SSatish Balay u38 = u[38]; 2789371c9d4SSatish Balay u39 = u[39]; 2799371c9d4SSatish Balay u40 = u[40]; 2809371c9d4SSatish Balay u41 = u[41]; 2819371c9d4SSatish Balay u42 = u[42]; 2829371c9d4SSatish Balay u43 = u[43]; 2839371c9d4SSatish Balay u44 = u[44]; 2849371c9d4SSatish Balay u45 = u[45]; 2859371c9d4SSatish Balay u46 = u[46]; 2869371c9d4SSatish Balay u47 = u[47]; 2879371c9d4SSatish Balay u48 = u[48]; 28881278733SSatish Balay 2891b3064deSBarry Smith wp[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5 + uik[6] * u6; 2901b3064deSBarry Smith wp[1] += uik[7] * u0 + uik[8] * u1 + uik[9] * u2 + uik[10] * u3 + uik[11] * u4 + uik[12] * u5 + uik[13] * u6; 2911b3064deSBarry Smith wp[2] += uik[14] * u0 + uik[15] * u1 + uik[16] * u2 + uik[17] * u3 + uik[18] * u4 + uik[19] * u5 + uik[20] * u6; 2921b3064deSBarry Smith wp[3] += uik[21] * u0 + uik[22] * u1 + uik[23] * u2 + uik[24] * u3 + uik[25] * u4 + uik[26] * u5 + uik[27] * u6; 2931b3064deSBarry Smith wp[4] += uik[28] * u0 + uik[29] * u1 + uik[30] * u2 + uik[31] * u3 + uik[32] * u4 + uik[33] * u5 + uik[34] * u6; 2941b3064deSBarry Smith wp[5] += uik[35] * u0 + uik[36] * u1 + uik[37] * u2 + uik[38] * u3 + uik[39] * u4 + uik[40] * u5 + uik[41] * u6; 2951b3064deSBarry Smith wp[6] += uik[42] * u0 + uik[43] * u1 + uik[44] * u2 + uik[45] * u3 + uik[46] * u4 + uik[47] * u5 + uik[48] * u6; 29681278733SSatish Balay 2971b3064deSBarry Smith wp[7] += uik[0] * u7 + uik[1] * u8 + uik[2] * u9 + uik[3] * u10 + uik[4] * u11 + uik[5] * u12 + uik[6] * u13; 2981b3064deSBarry Smith wp[8] += uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11 + uik[12] * u12 + uik[13] * u13; 2991b3064deSBarry Smith wp[9] += uik[14] * u7 + uik[15] * u8 + uik[16] * u9 + uik[17] * u10 + uik[18] * u11 + uik[19] * u12 + uik[20] * u13; 3001b3064deSBarry Smith wp[10] += uik[21] * u7 + uik[22] * u8 + uik[23] * u9 + uik[24] * u10 + uik[25] * u11 + uik[26] * u12 + uik[27] * u13; 3011b3064deSBarry Smith wp[11] += uik[28] * u7 + uik[29] * u8 + uik[30] * u9 + uik[31] * u10 + uik[32] * u11 + uik[33] * u12 + uik[34] * u13; 3021b3064deSBarry Smith wp[12] += uik[35] * u7 + uik[36] * u8 + uik[37] * u9 + uik[38] * u10 + uik[39] * u11 + uik[40] * u12 + uik[41] * u13; 3031b3064deSBarry Smith wp[13] += uik[42] * u7 + uik[43] * u8 + uik[44] * u9 + uik[45] * u10 + uik[46] * u11 + uik[47] * u12 + uik[48] * u13; 30481278733SSatish Balay 3051b3064deSBarry Smith wp[14] += uik[0] * u14 + uik[1] * u15 + uik[2] * u16 + uik[3] * u17 + uik[4] * u18 + uik[5] * u19 + uik[6] * u20; 3061b3064deSBarry Smith wp[15] += uik[7] * u14 + uik[8] * u15 + uik[9] * u16 + uik[10] * u17 + uik[11] * u18 + uik[12] * u19 + uik[13] * u20; 3071b3064deSBarry Smith wp[16] += uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17 + uik[18] * u18 + uik[19] * u19 + uik[20] * u20; 3081b3064deSBarry Smith wp[17] += uik[21] * u14 + uik[22] * u15 + uik[23] * u16 + uik[24] * u17 + uik[25] * u18 + uik[26] * u19 + uik[27] * u20; 3091b3064deSBarry Smith wp[18] += uik[28] * u14 + uik[29] * u15 + uik[30] * u16 + uik[31] * u17 + uik[32] * u18 + uik[33] * u19 + uik[34] * u20; 3101b3064deSBarry Smith wp[19] += uik[35] * u14 + uik[36] * u15 + uik[37] * u16 + uik[38] * u17 + uik[39] * u18 + uik[40] * u19 + uik[41] * u20; 3111b3064deSBarry Smith wp[20] += uik[42] * u14 + uik[43] * u15 + uik[44] * u16 + uik[45] * u17 + uik[46] * u18 + uik[47] * u19 + uik[48] * u20; 31281278733SSatish Balay 3131b3064deSBarry Smith wp[21] += uik[0] * u21 + uik[1] * u22 + uik[2] * u23 + uik[3] * u24 + uik[4] * u25 + uik[5] * u26 + uik[6] * u27; 3141b3064deSBarry Smith wp[22] += uik[7] * u21 + uik[8] * u22 + uik[9] * u23 + uik[10] * u24 + uik[11] * u25 + uik[12] * u26 + uik[13] * u27; 3151b3064deSBarry Smith wp[23] += uik[14] * u21 + uik[15] * u22 + uik[16] * u23 + uik[17] * u24 + uik[18] * u25 + uik[19] * u26 + uik[20] * u27; 3161b3064deSBarry Smith wp[24] += uik[21] * u21 + uik[22] * u22 + uik[23] * u23 + uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27; 3171b3064deSBarry Smith wp[25] += uik[28] * u21 + uik[29] * u22 + uik[30] * u23 + uik[31] * u24 + uik[32] * u25 + uik[33] * u26 + uik[34] * u27; 3181b3064deSBarry Smith wp[26] += uik[35] * u21 + uik[36] * u22 + uik[37] * u23 + uik[38] * u24 + uik[39] * u25 + uik[40] * u26 + uik[41] * u27; 3191b3064deSBarry Smith wp[27] += uik[42] * u21 + uik[43] * u22 + uik[44] * u23 + uik[45] * u24 + uik[46] * u25 + uik[47] * u26 + uik[48] * u27; 32081278733SSatish Balay 3211b3064deSBarry Smith wp[28] += uik[0] * u28 + uik[1] * u29 + uik[2] * u30 + uik[3] * u31 + uik[4] * u32 + uik[5] * u33 + uik[6] * u34; 3221b3064deSBarry Smith wp[29] += uik[7] * u28 + uik[8] * u29 + uik[9] * u30 + uik[10] * u31 + uik[11] * u32 + uik[12] * u33 + uik[13] * u34; 3231b3064deSBarry Smith wp[30] += uik[14] * u28 + uik[15] * u29 + uik[16] * u30 + uik[17] * u31 + uik[18] * u32 + uik[19] * u33 + uik[20] * u34; 3241b3064deSBarry Smith wp[31] += uik[21] * u28 + uik[22] * u29 + uik[23] * u30 + uik[24] * u31 + uik[25] * u32 + uik[26] * u33 + uik[27] * u34; 3251b3064deSBarry Smith wp[32] += uik[28] * u28 + uik[29] * u29 + uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34; 3261b3064deSBarry Smith wp[33] += uik[35] * u28 + uik[36] * u29 + uik[37] * u30 + uik[38] * u31 + uik[39] * u32 + uik[40] * u33 + uik[41] * u34; 3271b3064deSBarry Smith wp[34] += uik[42] * u28 + uik[43] * u29 + uik[44] * u30 + uik[45] * u31 + uik[46] * u32 + uik[47] * u33 + uik[48] * u34; 32881278733SSatish Balay 3291b3064deSBarry Smith wp[35] += uik[0] * u35 + uik[1] * u36 + uik[2] * u37 + uik[3] * u38 + uik[4] * u39 + uik[5] * u40 + uik[6] * u41; 3301b3064deSBarry Smith wp[36] += uik[7] * u35 + uik[8] * u36 + uik[9] * u37 + uik[10] * u38 + uik[11] * u39 + uik[12] * u40 + uik[13] * u41; 3311b3064deSBarry Smith wp[37] += uik[14] * u35 + uik[15] * u36 + uik[16] * u37 + uik[17] * u38 + uik[18] * u39 + uik[19] * u40 + uik[20] * u41; 3321b3064deSBarry Smith wp[38] += uik[21] * u35 + uik[22] * u36 + uik[23] * u37 + uik[24] * u38 + uik[25] * u39 + uik[26] * u40 + uik[27] * u41; 3331b3064deSBarry Smith wp[39] += uik[28] * u35 + uik[29] * u36 + uik[30] * u37 + uik[31] * u38 + uik[32] * u39 + uik[33] * u40 + uik[34] * u41; 3341b3064deSBarry Smith wp[40] += uik[35] * u35 + uik[36] * u36 + uik[37] * u37 + uik[38] * u38 + uik[39] * u39 + uik[40] * u40 + uik[41] * u41; 3351b3064deSBarry Smith wp[41] += uik[42] * u35 + uik[43] * u36 + uik[44] * u37 + uik[45] * u38 + uik[46] * u39 + uik[47] * u40 + uik[48] * u41; 3361b3064deSBarry Smith 3371b3064deSBarry Smith wp[42] += uik[0] * u42 + uik[1] * u43 + uik[2] * u44 + uik[3] * u45 + uik[4] * u46 + uik[5] * u47 + uik[6] * u48; 3381b3064deSBarry Smith wp[43] += uik[7] * u42 + uik[8] * u43 + uik[9] * u44 + uik[10] * u45 + uik[11] * u46 + uik[12] * u47 + uik[13] * u48; 3391b3064deSBarry Smith wp[44] += uik[14] * u42 + uik[15] * u43 + uik[16] * u44 + uik[17] * u45 + uik[18] * u46 + uik[19] * u47 + uik[20] * u48; 3401b3064deSBarry Smith wp[45] += uik[21] * u42 + uik[22] * u43 + uik[23] * u44 + uik[24] * u45 + uik[25] * u46 + uik[26] * u47 + uik[27] * u48; 3411b3064deSBarry Smith wp[46] += uik[28] * u42 + uik[29] * u43 + uik[30] * u44 + uik[31] * u45 + uik[32] * u46 + uik[33] * u47 + uik[34] * u48; 3421b3064deSBarry Smith wp[47] += uik[35] * u42 + uik[36] * u43 + uik[37] * u44 + uik[38] * u45 + uik[39] * u46 + uik[40] * u47 + uik[41] * u48; 3431b3064deSBarry Smith wp[48] += uik[42] * u42 + uik[43] * u43 + uik[44] * u44 + uik[45] * u45 + uik[46] * u46 + uik[47] * u47 + uik[48] * u48; 34481278733SSatish Balay } 3459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * 343.0 * (jmax - jmin))); 34681278733SSatish Balay 34781278733SSatish Balay /* ... add i to row list for next nonzero entry */ 34881278733SSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 34981278733SSatish Balay j = bj[jmin]; 3509371c9d4SSatish Balay jl[i] = jl[j]; 3519371c9d4SSatish Balay jl[j] = i; /* update jl */ 35281278733SSatish Balay } 35381278733SSatish Balay i = nexti; 35481278733SSatish Balay } 35581278733SSatish Balay 35681278733SSatish Balay /* save nonzero entries in k-th row of U ... */ 35781278733SSatish Balay 35881278733SSatish Balay /* invert diagonal block */ 35981278733SSatish Balay d = ba + k * 49; 3609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(d, dk, 49)); 3619566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(d, shift, allowzeropivot, &zeropivotdetected)); 3627b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 36381278733SSatish Balay 3649371c9d4SSatish Balay jmin = bi[k]; 3659371c9d4SSatish Balay jmax = bi[k + 1]; 36681278733SSatish Balay if (jmin < jmax) { 36781278733SSatish Balay for (j = jmin; j < jmax; j++) { 36881278733SSatish Balay vj = bj[j]; /* block col. index of U */ 36981278733SSatish Balay u = ba + j * 49; 37081278733SSatish Balay wp = w + vj * 49; 37181278733SSatish Balay for (k1 = 0; k1 < 49; k1++) { 37281278733SSatish Balay *u++ = *wp; 37381278733SSatish Balay *wp++ = 0.0; 37481278733SSatish Balay } 37581278733SSatish Balay } 37681278733SSatish Balay 37781278733SSatish Balay /* ... add k to row list for first nonzero entry in k-th row */ 37881278733SSatish Balay il[k] = jmin; 37981278733SSatish Balay i = bj[jmin]; 3809371c9d4SSatish Balay jl[k] = jl[i]; 3819371c9d4SSatish Balay jl[i] = k; 38281278733SSatish Balay } 38381278733SSatish Balay } 38481278733SSatish Balay 3859566063dSJacob Faibussowitsch PetscCall(PetscFree(w)); 3869566063dSJacob Faibussowitsch PetscCall(PetscFree2(il, jl)); 3879566063dSJacob Faibussowitsch PetscCall(PetscFree2(dk, uik)); 38881278733SSatish Balay 3894f79d315SHong Zhang C->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 3904f79d315SHong Zhang C->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 3914f79d315SHong Zhang C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 3924f79d315SHong Zhang C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 39381278733SSatish Balay C->assembled = PETSC_TRUE; 39481278733SSatish Balay C->preallocated = PETSC_TRUE; 39526fbe8dcSKarl Rupp 3969566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.3333 * 343 * b->mbs)); /* from inverting diagonal blocks */ 397*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39827e0cc20SSatish Balay } 399