1be1d678aSKris Buschelman 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 783287d42SBarry Smith /* 883287d42SBarry Smith Version for when blocks are 7 by 7 983287d42SBarry Smith */ 10*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_inplace(Mat C, Mat A, const MatFactorInfo *info) 11*d71ae5a4SJacob Faibussowitsch { 1283287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 1383287d42SBarry Smith IS isrow = b->row, isicol = b->icol; 145d0c19d7SBarry Smith const PetscInt *r, *ic, *bi = b->i, *bj = b->j, *ajtmp, *diag_offset = b->diag, *ai = a->i, *aj = a->j, *pj, *ajtmpold; 155d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, nz, row, idx; 1683287d42SBarry Smith MatScalar *pv, *v, *rtmp, *pc, *w, *x; 1783287d42SBarry Smith MatScalar p1, p2, p3, p4, m1, m2, m3, m4, m5, m6, m7, m8, m9, x1, x2, x3, x4; 1883287d42SBarry Smith MatScalar p5, p6, p7, p8, p9, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16; 1983287d42SBarry Smith MatScalar x17, x18, x19, x20, x21, x22, x23, x24, x25, p10, p11, p12, p13, p14; 2083287d42SBarry Smith MatScalar p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, m10, m11, m12; 2183287d42SBarry Smith MatScalar m13, m14, m15, m16, m17, m18, m19, m20, m21, m22, m23, m24, m25; 2283287d42SBarry Smith MatScalar p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36; 2383287d42SBarry Smith MatScalar p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49; 2483287d42SBarry Smith MatScalar x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36; 2583287d42SBarry Smith MatScalar x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49; 2683287d42SBarry Smith MatScalar m26, m27, m28, m29, m30, m31, m32, m33, m34, m35, m36; 2783287d42SBarry Smith MatScalar m37, m38, m39, m40, m41, m42, m43, m44, m45, m46, m47, m48, m49; 2883287d42SBarry Smith MatScalar *ba = b->a, *aa = a->a; 29182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 30a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 3183287d42SBarry Smith 3283287d42SBarry Smith PetscFunctionBegin; 330164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 349566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 359566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(49 * (n + 1), &rtmp)); 3783287d42SBarry Smith 3883287d42SBarry Smith for (i = 0; i < n; i++) { 3983287d42SBarry Smith nz = bi[i + 1] - bi[i]; 4083287d42SBarry Smith ajtmp = bj + bi[i]; 4183287d42SBarry Smith for (j = 0; j < nz; j++) { 4283287d42SBarry Smith x = rtmp + 49 * ajtmp[j]; 4383287d42SBarry Smith x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = x[9] = 0.0; 4483287d42SBarry Smith x[10] = x[11] = x[12] = x[13] = x[14] = x[15] = x[16] = x[17] = 0.0; 4583287d42SBarry Smith x[18] = x[19] = x[20] = x[21] = x[22] = x[23] = x[24] = x[25] = 0.0; 4683287d42SBarry Smith x[26] = x[27] = x[28] = x[29] = x[30] = x[31] = x[32] = x[33] = 0.0; 4783287d42SBarry Smith x[34] = x[35] = x[36] = x[37] = x[38] = x[39] = x[40] = x[41] = 0.0; 4883287d42SBarry Smith x[42] = x[43] = x[44] = x[45] = x[46] = x[47] = x[48] = 0.0; 4983287d42SBarry Smith } 5083287d42SBarry Smith /* load in initial (unfactored row) */ 5183287d42SBarry Smith idx = r[i]; 5283287d42SBarry Smith nz = ai[idx + 1] - ai[idx]; 5383287d42SBarry Smith ajtmpold = aj + ai[idx]; 5483287d42SBarry Smith v = aa + 49 * ai[idx]; 5583287d42SBarry Smith for (j = 0; j < nz; j++) { 5683287d42SBarry Smith x = rtmp + 49 * ic[ajtmpold[j]]; 579371c9d4SSatish Balay x[0] = v[0]; 589371c9d4SSatish Balay x[1] = v[1]; 599371c9d4SSatish Balay x[2] = v[2]; 609371c9d4SSatish Balay x[3] = v[3]; 619371c9d4SSatish Balay x[4] = v[4]; 629371c9d4SSatish Balay x[5] = v[5]; 639371c9d4SSatish Balay x[6] = v[6]; 649371c9d4SSatish Balay x[7] = v[7]; 659371c9d4SSatish Balay x[8] = v[8]; 669371c9d4SSatish Balay x[9] = v[9]; 679371c9d4SSatish Balay x[10] = v[10]; 689371c9d4SSatish Balay x[11] = v[11]; 699371c9d4SSatish Balay x[12] = v[12]; 709371c9d4SSatish Balay x[13] = v[13]; 719371c9d4SSatish Balay x[14] = v[14]; 729371c9d4SSatish Balay x[15] = v[15]; 739371c9d4SSatish Balay x[16] = v[16]; 749371c9d4SSatish Balay x[17] = v[17]; 759371c9d4SSatish Balay x[18] = v[18]; 769371c9d4SSatish Balay x[19] = v[19]; 779371c9d4SSatish Balay x[20] = v[20]; 789371c9d4SSatish Balay x[21] = v[21]; 799371c9d4SSatish Balay x[22] = v[22]; 809371c9d4SSatish Balay x[23] = v[23]; 819371c9d4SSatish Balay x[24] = v[24]; 829371c9d4SSatish Balay x[25] = v[25]; 839371c9d4SSatish Balay x[26] = v[26]; 849371c9d4SSatish Balay x[27] = v[27]; 859371c9d4SSatish Balay x[28] = v[28]; 869371c9d4SSatish Balay x[29] = v[29]; 879371c9d4SSatish Balay x[30] = v[30]; 889371c9d4SSatish Balay x[31] = v[31]; 899371c9d4SSatish Balay x[32] = v[32]; 909371c9d4SSatish Balay x[33] = v[33]; 919371c9d4SSatish Balay x[34] = v[34]; 929371c9d4SSatish Balay x[35] = v[35]; 939371c9d4SSatish Balay x[36] = v[36]; 949371c9d4SSatish Balay x[37] = v[37]; 959371c9d4SSatish Balay x[38] = v[38]; 969371c9d4SSatish Balay x[39] = v[39]; 979371c9d4SSatish Balay x[40] = v[40]; 989371c9d4SSatish Balay x[41] = v[41]; 999371c9d4SSatish Balay x[42] = v[42]; 1009371c9d4SSatish Balay x[43] = v[43]; 1019371c9d4SSatish Balay x[44] = v[44]; 1029371c9d4SSatish Balay x[45] = v[45]; 1039371c9d4SSatish Balay x[46] = v[46]; 1049371c9d4SSatish Balay x[47] = v[47]; 10583287d42SBarry Smith x[48] = v[48]; 10683287d42SBarry Smith v += 49; 10783287d42SBarry Smith } 10883287d42SBarry Smith row = *ajtmp++; 10983287d42SBarry Smith while (row < i) { 11083287d42SBarry Smith pc = rtmp + 49 * row; 1119371c9d4SSatish Balay p1 = pc[0]; 1129371c9d4SSatish Balay p2 = pc[1]; 1139371c9d4SSatish Balay p3 = pc[2]; 1149371c9d4SSatish Balay p4 = pc[3]; 1159371c9d4SSatish Balay p5 = pc[4]; 1169371c9d4SSatish Balay p6 = pc[5]; 1179371c9d4SSatish Balay p7 = pc[6]; 1189371c9d4SSatish Balay p8 = pc[7]; 1199371c9d4SSatish Balay p9 = pc[8]; 1209371c9d4SSatish Balay p10 = pc[9]; 1219371c9d4SSatish Balay p11 = pc[10]; 1229371c9d4SSatish Balay p12 = pc[11]; 1239371c9d4SSatish Balay p13 = pc[12]; 1249371c9d4SSatish Balay p14 = pc[13]; 1259371c9d4SSatish Balay p15 = pc[14]; 1269371c9d4SSatish Balay p16 = pc[15]; 1279371c9d4SSatish Balay p17 = pc[16]; 1289371c9d4SSatish Balay p18 = pc[17]; 1299371c9d4SSatish Balay p19 = pc[18]; 1309371c9d4SSatish Balay p20 = pc[19]; 1319371c9d4SSatish Balay p21 = pc[20]; 1329371c9d4SSatish Balay p22 = pc[21]; 1339371c9d4SSatish Balay p23 = pc[22]; 1349371c9d4SSatish Balay p24 = pc[23]; 1359371c9d4SSatish Balay p25 = pc[24]; 1369371c9d4SSatish Balay p26 = pc[25]; 1379371c9d4SSatish Balay p27 = pc[26]; 1389371c9d4SSatish Balay p28 = pc[27]; 1399371c9d4SSatish Balay p29 = pc[28]; 1409371c9d4SSatish Balay p30 = pc[29]; 1419371c9d4SSatish Balay p31 = pc[30]; 1429371c9d4SSatish Balay p32 = pc[31]; 1439371c9d4SSatish Balay p33 = pc[32]; 1449371c9d4SSatish Balay p34 = pc[33]; 1459371c9d4SSatish Balay p35 = pc[34]; 1469371c9d4SSatish Balay p36 = pc[35]; 1479371c9d4SSatish Balay p37 = pc[36]; 1489371c9d4SSatish Balay p38 = pc[37]; 1499371c9d4SSatish Balay p39 = pc[38]; 1509371c9d4SSatish Balay p40 = pc[39]; 1519371c9d4SSatish Balay p41 = pc[40]; 1529371c9d4SSatish Balay p42 = pc[41]; 1539371c9d4SSatish Balay p43 = pc[42]; 1549371c9d4SSatish Balay p44 = pc[43]; 1559371c9d4SSatish Balay p45 = pc[44]; 1569371c9d4SSatish Balay p46 = pc[45]; 1579371c9d4SSatish Balay p47 = pc[46]; 1589371c9d4SSatish Balay p48 = pc[47]; 15983287d42SBarry Smith p49 = pc[48]; 1609371c9d4SSatish Balay if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 || p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0 || p10 != 0.0 || p11 != 0.0 || p12 != 0.0 || p13 != 0.0 || p14 != 0.0 || p15 != 0.0 || p16 != 0.0 || p17 != 0.0 || p18 != 0.0 || p19 != 0.0 || p20 != 0.0 || p21 != 0.0 || p22 != 0.0 || p23 != 0.0 || p24 != 0.0 || p25 != 0.0 || p26 != 0.0 || p27 != 0.0 || p28 != 0.0 || p29 != 0.0 || p30 != 0.0 || p31 != 0.0 || p32 != 0.0 || p33 != 0.0 || p34 != 0.0 || p35 != 0.0 || p36 != 0.0 || p37 != 0.0 || p38 != 0.0 || p39 != 0.0 || p40 != 0.0 || p41 != 0.0 || p42 != 0.0 || p43 != 0.0 || p44 != 0.0 || p45 != 0.0 || p46 != 0.0 || p47 != 0.0 || p48 != 0.0 || p49 != 0.0) { 16183287d42SBarry Smith pv = ba + 49 * diag_offset[row]; 16283287d42SBarry Smith pj = bj + diag_offset[row] + 1; 1639371c9d4SSatish Balay x1 = pv[0]; 1649371c9d4SSatish Balay x2 = pv[1]; 1659371c9d4SSatish Balay x3 = pv[2]; 1669371c9d4SSatish Balay x4 = pv[3]; 1679371c9d4SSatish Balay x5 = pv[4]; 1689371c9d4SSatish Balay x6 = pv[5]; 1699371c9d4SSatish Balay x7 = pv[6]; 1709371c9d4SSatish Balay x8 = pv[7]; 1719371c9d4SSatish Balay x9 = pv[8]; 1729371c9d4SSatish Balay x10 = pv[9]; 1739371c9d4SSatish Balay x11 = pv[10]; 1749371c9d4SSatish Balay x12 = pv[11]; 1759371c9d4SSatish Balay x13 = pv[12]; 1769371c9d4SSatish Balay x14 = pv[13]; 1779371c9d4SSatish Balay x15 = pv[14]; 1789371c9d4SSatish Balay x16 = pv[15]; 1799371c9d4SSatish Balay x17 = pv[16]; 1809371c9d4SSatish Balay x18 = pv[17]; 1819371c9d4SSatish Balay x19 = pv[18]; 1829371c9d4SSatish Balay x20 = pv[19]; 1839371c9d4SSatish Balay x21 = pv[20]; 1849371c9d4SSatish Balay x22 = pv[21]; 1859371c9d4SSatish Balay x23 = pv[22]; 1869371c9d4SSatish Balay x24 = pv[23]; 1879371c9d4SSatish Balay x25 = pv[24]; 1889371c9d4SSatish Balay x26 = pv[25]; 1899371c9d4SSatish Balay x27 = pv[26]; 1909371c9d4SSatish Balay x28 = pv[27]; 1919371c9d4SSatish Balay x29 = pv[28]; 1929371c9d4SSatish Balay x30 = pv[29]; 1939371c9d4SSatish Balay x31 = pv[30]; 1949371c9d4SSatish Balay x32 = pv[31]; 1959371c9d4SSatish Balay x33 = pv[32]; 1969371c9d4SSatish Balay x34 = pv[33]; 1979371c9d4SSatish Balay x35 = pv[34]; 1989371c9d4SSatish Balay x36 = pv[35]; 1999371c9d4SSatish Balay x37 = pv[36]; 2009371c9d4SSatish Balay x38 = pv[37]; 2019371c9d4SSatish Balay x39 = pv[38]; 2029371c9d4SSatish Balay x40 = pv[39]; 2039371c9d4SSatish Balay x41 = pv[40]; 2049371c9d4SSatish Balay x42 = pv[41]; 2059371c9d4SSatish Balay x43 = pv[42]; 2069371c9d4SSatish Balay x44 = pv[43]; 2079371c9d4SSatish Balay x45 = pv[44]; 2089371c9d4SSatish Balay x46 = pv[45]; 2099371c9d4SSatish Balay x47 = pv[46]; 2109371c9d4SSatish Balay x48 = pv[47]; 21183287d42SBarry Smith x49 = pv[48]; 21283287d42SBarry Smith pc[0] = m1 = p1 * x1 + p8 * x2 + p15 * x3 + p22 * x4 + p29 * x5 + p36 * x6 + p43 * x7; 21383287d42SBarry Smith pc[1] = m2 = p2 * x1 + p9 * x2 + p16 * x3 + p23 * x4 + p30 * x5 + p37 * x6 + p44 * x7; 21483287d42SBarry Smith pc[2] = m3 = p3 * x1 + p10 * x2 + p17 * x3 + p24 * x4 + p31 * x5 + p38 * x6 + p45 * x7; 21583287d42SBarry Smith pc[3] = m4 = p4 * x1 + p11 * x2 + p18 * x3 + p25 * x4 + p32 * x5 + p39 * x6 + p46 * x7; 21683287d42SBarry Smith pc[4] = m5 = p5 * x1 + p12 * x2 + p19 * x3 + p26 * x4 + p33 * x5 + p40 * x6 + p47 * x7; 21783287d42SBarry Smith pc[5] = m6 = p6 * x1 + p13 * x2 + p20 * x3 + p27 * x4 + p34 * x5 + p41 * x6 + p48 * x7; 21883287d42SBarry Smith pc[6] = m7 = p7 * x1 + p14 * x2 + p21 * x3 + p28 * x4 + p35 * x5 + p42 * x6 + p49 * x7; 21983287d42SBarry Smith 22083287d42SBarry Smith pc[7] = m8 = p1 * x8 + p8 * x9 + p15 * x10 + p22 * x11 + p29 * x12 + p36 * x13 + p43 * x14; 22183287d42SBarry Smith pc[8] = m9 = p2 * x8 + p9 * x9 + p16 * x10 + p23 * x11 + p30 * x12 + p37 * x13 + p44 * x14; 22283287d42SBarry Smith pc[9] = m10 = p3 * x8 + p10 * x9 + p17 * x10 + p24 * x11 + p31 * x12 + p38 * x13 + p45 * x14; 22383287d42SBarry Smith pc[10] = m11 = p4 * x8 + p11 * x9 + p18 * x10 + p25 * x11 + p32 * x12 + p39 * x13 + p46 * x14; 22483287d42SBarry Smith pc[11] = m12 = p5 * x8 + p12 * x9 + p19 * x10 + p26 * x11 + p33 * x12 + p40 * x13 + p47 * x14; 22583287d42SBarry Smith pc[12] = m13 = p6 * x8 + p13 * x9 + p20 * x10 + p27 * x11 + p34 * x12 + p41 * x13 + p48 * x14; 22683287d42SBarry Smith pc[13] = m14 = p7 * x8 + p14 * x9 + p21 * x10 + p28 * x11 + p35 * x12 + p42 * x13 + p49 * x14; 22783287d42SBarry Smith 22883287d42SBarry Smith pc[14] = m15 = p1 * x15 + p8 * x16 + p15 * x17 + p22 * x18 + p29 * x19 + p36 * x20 + p43 * x21; 22983287d42SBarry Smith pc[15] = m16 = p2 * x15 + p9 * x16 + p16 * x17 + p23 * x18 + p30 * x19 + p37 * x20 + p44 * x21; 23083287d42SBarry Smith pc[16] = m17 = p3 * x15 + p10 * x16 + p17 * x17 + p24 * x18 + p31 * x19 + p38 * x20 + p45 * x21; 23183287d42SBarry Smith pc[17] = m18 = p4 * x15 + p11 * x16 + p18 * x17 + p25 * x18 + p32 * x19 + p39 * x20 + p46 * x21; 23283287d42SBarry Smith pc[18] = m19 = p5 * x15 + p12 * x16 + p19 * x17 + p26 * x18 + p33 * x19 + p40 * x20 + p47 * x21; 23383287d42SBarry Smith pc[19] = m20 = p6 * x15 + p13 * x16 + p20 * x17 + p27 * x18 + p34 * x19 + p41 * x20 + p48 * x21; 23483287d42SBarry Smith pc[20] = m21 = p7 * x15 + p14 * x16 + p21 * x17 + p28 * x18 + p35 * x19 + p42 * x20 + p49 * x21; 23583287d42SBarry Smith 23683287d42SBarry Smith pc[21] = m22 = p1 * x22 + p8 * x23 + p15 * x24 + p22 * x25 + p29 * x26 + p36 * x27 + p43 * x28; 23783287d42SBarry Smith pc[22] = m23 = p2 * x22 + p9 * x23 + p16 * x24 + p23 * x25 + p30 * x26 + p37 * x27 + p44 * x28; 23883287d42SBarry Smith pc[23] = m24 = p3 * x22 + p10 * x23 + p17 * x24 + p24 * x25 + p31 * x26 + p38 * x27 + p45 * x28; 23983287d42SBarry Smith pc[24] = m25 = p4 * x22 + p11 * x23 + p18 * x24 + p25 * x25 + p32 * x26 + p39 * x27 + p46 * x28; 24083287d42SBarry Smith pc[25] = m26 = p5 * x22 + p12 * x23 + p19 * x24 + p26 * x25 + p33 * x26 + p40 * x27 + p47 * x28; 24183287d42SBarry Smith pc[26] = m27 = p6 * x22 + p13 * x23 + p20 * x24 + p27 * x25 + p34 * x26 + p41 * x27 + p48 * x28; 24283287d42SBarry Smith pc[27] = m28 = p7 * x22 + p14 * x23 + p21 * x24 + p28 * x25 + p35 * x26 + p42 * x27 + p49 * x28; 24383287d42SBarry Smith 24483287d42SBarry Smith pc[28] = m29 = p1 * x29 + p8 * x30 + p15 * x31 + p22 * x32 + p29 * x33 + p36 * x34 + p43 * x35; 24583287d42SBarry Smith pc[29] = m30 = p2 * x29 + p9 * x30 + p16 * x31 + p23 * x32 + p30 * x33 + p37 * x34 + p44 * x35; 24683287d42SBarry Smith pc[30] = m31 = p3 * x29 + p10 * x30 + p17 * x31 + p24 * x32 + p31 * x33 + p38 * x34 + p45 * x35; 24783287d42SBarry Smith pc[31] = m32 = p4 * x29 + p11 * x30 + p18 * x31 + p25 * x32 + p32 * x33 + p39 * x34 + p46 * x35; 24883287d42SBarry Smith pc[32] = m33 = p5 * x29 + p12 * x30 + p19 * x31 + p26 * x32 + p33 * x33 + p40 * x34 + p47 * x35; 24983287d42SBarry Smith pc[33] = m34 = p6 * x29 + p13 * x30 + p20 * x31 + p27 * x32 + p34 * x33 + p41 * x34 + p48 * x35; 25083287d42SBarry Smith pc[34] = m35 = p7 * x29 + p14 * x30 + p21 * x31 + p28 * x32 + p35 * x33 + p42 * x34 + p49 * x35; 25183287d42SBarry Smith 25283287d42SBarry Smith pc[35] = m36 = p1 * x36 + p8 * x37 + p15 * x38 + p22 * x39 + p29 * x40 + p36 * x41 + p43 * x42; 25383287d42SBarry Smith pc[36] = m37 = p2 * x36 + p9 * x37 + p16 * x38 + p23 * x39 + p30 * x40 + p37 * x41 + p44 * x42; 25483287d42SBarry Smith pc[37] = m38 = p3 * x36 + p10 * x37 + p17 * x38 + p24 * x39 + p31 * x40 + p38 * x41 + p45 * x42; 25583287d42SBarry Smith pc[38] = m39 = p4 * x36 + p11 * x37 + p18 * x38 + p25 * x39 + p32 * x40 + p39 * x41 + p46 * x42; 25683287d42SBarry Smith pc[39] = m40 = p5 * x36 + p12 * x37 + p19 * x38 + p26 * x39 + p33 * x40 + p40 * x41 + p47 * x42; 25783287d42SBarry Smith pc[40] = m41 = p6 * x36 + p13 * x37 + p20 * x38 + p27 * x39 + p34 * x40 + p41 * x41 + p48 * x42; 25883287d42SBarry Smith pc[41] = m42 = p7 * x36 + p14 * x37 + p21 * x38 + p28 * x39 + p35 * x40 + p42 * x41 + p49 * x42; 25983287d42SBarry Smith 26083287d42SBarry Smith pc[42] = m43 = p1 * x43 + p8 * x44 + p15 * x45 + p22 * x46 + p29 * x47 + p36 * x48 + p43 * x49; 26183287d42SBarry Smith pc[43] = m44 = p2 * x43 + p9 * x44 + p16 * x45 + p23 * x46 + p30 * x47 + p37 * x48 + p44 * x49; 26283287d42SBarry Smith pc[44] = m45 = p3 * x43 + p10 * x44 + p17 * x45 + p24 * x46 + p31 * x47 + p38 * x48 + p45 * x49; 26383287d42SBarry Smith pc[45] = m46 = p4 * x43 + p11 * x44 + p18 * x45 + p25 * x46 + p32 * x47 + p39 * x48 + p46 * x49; 26483287d42SBarry Smith pc[46] = m47 = p5 * x43 + p12 * x44 + p19 * x45 + p26 * x46 + p33 * x47 + p40 * x48 + p47 * x49; 26583287d42SBarry Smith pc[47] = m48 = p6 * x43 + p13 * x44 + p20 * x45 + p27 * x46 + p34 * x47 + p41 * x48 + p48 * x49; 26683287d42SBarry Smith pc[48] = m49 = p7 * x43 + p14 * x44 + p21 * x45 + p28 * x46 + p35 * x47 + p42 * x48 + p49 * x49; 26783287d42SBarry Smith 26883287d42SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1; 26983287d42SBarry Smith pv += 49; 27083287d42SBarry Smith for (j = 0; j < nz; j++) { 2719371c9d4SSatish Balay x1 = pv[0]; 2729371c9d4SSatish Balay x2 = pv[1]; 2739371c9d4SSatish Balay x3 = pv[2]; 2749371c9d4SSatish Balay x4 = pv[3]; 2759371c9d4SSatish Balay x5 = pv[4]; 2769371c9d4SSatish Balay x6 = pv[5]; 2779371c9d4SSatish Balay x7 = pv[6]; 2789371c9d4SSatish Balay x8 = pv[7]; 2799371c9d4SSatish Balay x9 = pv[8]; 2809371c9d4SSatish Balay x10 = pv[9]; 2819371c9d4SSatish Balay x11 = pv[10]; 2829371c9d4SSatish Balay x12 = pv[11]; 2839371c9d4SSatish Balay x13 = pv[12]; 2849371c9d4SSatish Balay x14 = pv[13]; 2859371c9d4SSatish Balay x15 = pv[14]; 2869371c9d4SSatish Balay x16 = pv[15]; 2879371c9d4SSatish Balay x17 = pv[16]; 2889371c9d4SSatish Balay x18 = pv[17]; 2899371c9d4SSatish Balay x19 = pv[18]; 2909371c9d4SSatish Balay x20 = pv[19]; 2919371c9d4SSatish Balay x21 = pv[20]; 2929371c9d4SSatish Balay x22 = pv[21]; 2939371c9d4SSatish Balay x23 = pv[22]; 2949371c9d4SSatish Balay x24 = pv[23]; 2959371c9d4SSatish Balay x25 = pv[24]; 2969371c9d4SSatish Balay x26 = pv[25]; 2979371c9d4SSatish Balay x27 = pv[26]; 2989371c9d4SSatish Balay x28 = pv[27]; 2999371c9d4SSatish Balay x29 = pv[28]; 3009371c9d4SSatish Balay x30 = pv[29]; 3019371c9d4SSatish Balay x31 = pv[30]; 3029371c9d4SSatish Balay x32 = pv[31]; 3039371c9d4SSatish Balay x33 = pv[32]; 3049371c9d4SSatish Balay x34 = pv[33]; 3059371c9d4SSatish Balay x35 = pv[34]; 3069371c9d4SSatish Balay x36 = pv[35]; 3079371c9d4SSatish Balay x37 = pv[36]; 3089371c9d4SSatish Balay x38 = pv[37]; 3099371c9d4SSatish Balay x39 = pv[38]; 3109371c9d4SSatish Balay x40 = pv[39]; 3119371c9d4SSatish Balay x41 = pv[40]; 3129371c9d4SSatish Balay x42 = pv[41]; 3139371c9d4SSatish Balay x43 = pv[42]; 3149371c9d4SSatish Balay x44 = pv[43]; 3159371c9d4SSatish Balay x45 = pv[44]; 3169371c9d4SSatish Balay x46 = pv[45]; 3179371c9d4SSatish Balay x47 = pv[46]; 3189371c9d4SSatish Balay x48 = pv[47]; 31983287d42SBarry Smith x49 = pv[48]; 32083287d42SBarry Smith x = rtmp + 49 * pj[j]; 32183287d42SBarry Smith x[0] -= m1 * x1 + m8 * x2 + m15 * x3 + m22 * x4 + m29 * x5 + m36 * x6 + m43 * x7; 32283287d42SBarry Smith x[1] -= m2 * x1 + m9 * x2 + m16 * x3 + m23 * x4 + m30 * x5 + m37 * x6 + m44 * x7; 32383287d42SBarry Smith x[2] -= m3 * x1 + m10 * x2 + m17 * x3 + m24 * x4 + m31 * x5 + m38 * x6 + m45 * x7; 32483287d42SBarry Smith x[3] -= m4 * x1 + m11 * x2 + m18 * x3 + m25 * x4 + m32 * x5 + m39 * x6 + m46 * x7; 32583287d42SBarry Smith x[4] -= m5 * x1 + m12 * x2 + m19 * x3 + m26 * x4 + m33 * x5 + m40 * x6 + m47 * x7; 32683287d42SBarry Smith x[5] -= m6 * x1 + m13 * x2 + m20 * x3 + m27 * x4 + m34 * x5 + m41 * x6 + m48 * x7; 32783287d42SBarry Smith x[6] -= m7 * x1 + m14 * x2 + m21 * x3 + m28 * x4 + m35 * x5 + m42 * x6 + m49 * x7; 32883287d42SBarry Smith 32983287d42SBarry Smith x[7] -= m1 * x8 + m8 * x9 + m15 * x10 + m22 * x11 + m29 * x12 + m36 * x13 + m43 * x14; 33083287d42SBarry Smith x[8] -= m2 * x8 + m9 * x9 + m16 * x10 + m23 * x11 + m30 * x12 + m37 * x13 + m44 * x14; 33183287d42SBarry Smith x[9] -= m3 * x8 + m10 * x9 + m17 * x10 + m24 * x11 + m31 * x12 + m38 * x13 + m45 * x14; 33283287d42SBarry Smith x[10] -= m4 * x8 + m11 * x9 + m18 * x10 + m25 * x11 + m32 * x12 + m39 * x13 + m46 * x14; 33383287d42SBarry Smith x[11] -= m5 * x8 + m12 * x9 + m19 * x10 + m26 * x11 + m33 * x12 + m40 * x13 + m47 * x14; 33483287d42SBarry Smith x[12] -= m6 * x8 + m13 * x9 + m20 * x10 + m27 * x11 + m34 * x12 + m41 * x13 + m48 * x14; 33583287d42SBarry Smith x[13] -= m7 * x8 + m14 * x9 + m21 * x10 + m28 * x11 + m35 * x12 + m42 * x13 + m49 * x14; 33683287d42SBarry Smith 33783287d42SBarry Smith x[14] -= m1 * x15 + m8 * x16 + m15 * x17 + m22 * x18 + m29 * x19 + m36 * x20 + m43 * x21; 33883287d42SBarry Smith x[15] -= m2 * x15 + m9 * x16 + m16 * x17 + m23 * x18 + m30 * x19 + m37 * x20 + m44 * x21; 33983287d42SBarry Smith x[16] -= m3 * x15 + m10 * x16 + m17 * x17 + m24 * x18 + m31 * x19 + m38 * x20 + m45 * x21; 34083287d42SBarry Smith x[17] -= m4 * x15 + m11 * x16 + m18 * x17 + m25 * x18 + m32 * x19 + m39 * x20 + m46 * x21; 34183287d42SBarry Smith x[18] -= m5 * x15 + m12 * x16 + m19 * x17 + m26 * x18 + m33 * x19 + m40 * x20 + m47 * x21; 34283287d42SBarry Smith x[19] -= m6 * x15 + m13 * x16 + m20 * x17 + m27 * x18 + m34 * x19 + m41 * x20 + m48 * x21; 34383287d42SBarry Smith x[20] -= m7 * x15 + m14 * x16 + m21 * x17 + m28 * x18 + m35 * x19 + m42 * x20 + m49 * x21; 34483287d42SBarry Smith 34583287d42SBarry Smith x[21] -= m1 * x22 + m8 * x23 + m15 * x24 + m22 * x25 + m29 * x26 + m36 * x27 + m43 * x28; 34683287d42SBarry Smith x[22] -= m2 * x22 + m9 * x23 + m16 * x24 + m23 * x25 + m30 * x26 + m37 * x27 + m44 * x28; 34783287d42SBarry Smith x[23] -= m3 * x22 + m10 * x23 + m17 * x24 + m24 * x25 + m31 * x26 + m38 * x27 + m45 * x28; 34883287d42SBarry Smith x[24] -= m4 * x22 + m11 * x23 + m18 * x24 + m25 * x25 + m32 * x26 + m39 * x27 + m46 * x28; 34983287d42SBarry Smith x[25] -= m5 * x22 + m12 * x23 + m19 * x24 + m26 * x25 + m33 * x26 + m40 * x27 + m47 * x28; 35083287d42SBarry Smith x[26] -= m6 * x22 + m13 * x23 + m20 * x24 + m27 * x25 + m34 * x26 + m41 * x27 + m48 * x28; 35183287d42SBarry Smith x[27] -= m7 * x22 + m14 * x23 + m21 * x24 + m28 * x25 + m35 * x26 + m42 * x27 + m49 * x28; 35283287d42SBarry Smith 35383287d42SBarry Smith x[28] -= m1 * x29 + m8 * x30 + m15 * x31 + m22 * x32 + m29 * x33 + m36 * x34 + m43 * x35; 35483287d42SBarry Smith x[29] -= m2 * x29 + m9 * x30 + m16 * x31 + m23 * x32 + m30 * x33 + m37 * x34 + m44 * x35; 35583287d42SBarry Smith x[30] -= m3 * x29 + m10 * x30 + m17 * x31 + m24 * x32 + m31 * x33 + m38 * x34 + m45 * x35; 35683287d42SBarry Smith x[31] -= m4 * x29 + m11 * x30 + m18 * x31 + m25 * x32 + m32 * x33 + m39 * x34 + m46 * x35; 35783287d42SBarry Smith x[32] -= m5 * x29 + m12 * x30 + m19 * x31 + m26 * x32 + m33 * x33 + m40 * x34 + m47 * x35; 35883287d42SBarry Smith x[33] -= m6 * x29 + m13 * x30 + m20 * x31 + m27 * x32 + m34 * x33 + m41 * x34 + m48 * x35; 35983287d42SBarry Smith x[34] -= m7 * x29 + m14 * x30 + m21 * x31 + m28 * x32 + m35 * x33 + m42 * x34 + m49 * x35; 36083287d42SBarry Smith 36183287d42SBarry Smith x[35] -= m1 * x36 + m8 * x37 + m15 * x38 + m22 * x39 + m29 * x40 + m36 * x41 + m43 * x42; 36283287d42SBarry Smith x[36] -= m2 * x36 + m9 * x37 + m16 * x38 + m23 * x39 + m30 * x40 + m37 * x41 + m44 * x42; 36383287d42SBarry Smith x[37] -= m3 * x36 + m10 * x37 + m17 * x38 + m24 * x39 + m31 * x40 + m38 * x41 + m45 * x42; 36483287d42SBarry Smith x[38] -= m4 * x36 + m11 * x37 + m18 * x38 + m25 * x39 + m32 * x40 + m39 * x41 + m46 * x42; 36583287d42SBarry Smith x[39] -= m5 * x36 + m12 * x37 + m19 * x38 + m26 * x39 + m33 * x40 + m40 * x41 + m47 * x42; 36683287d42SBarry Smith x[40] -= m6 * x36 + m13 * x37 + m20 * x38 + m27 * x39 + m34 * x40 + m41 * x41 + m48 * x42; 36783287d42SBarry Smith x[41] -= m7 * x36 + m14 * x37 + m21 * x38 + m28 * x39 + m35 * x40 + m42 * x41 + m49 * x42; 36883287d42SBarry Smith 36983287d42SBarry Smith x[42] -= m1 * x43 + m8 * x44 + m15 * x45 + m22 * x46 + m29 * x47 + m36 * x48 + m43 * x49; 37083287d42SBarry Smith x[43] -= m2 * x43 + m9 * x44 + m16 * x45 + m23 * x46 + m30 * x47 + m37 * x48 + m44 * x49; 37183287d42SBarry Smith x[44] -= m3 * x43 + m10 * x44 + m17 * x45 + m24 * x46 + m31 * x47 + m38 * x48 + m45 * x49; 37283287d42SBarry Smith x[45] -= m4 * x43 + m11 * x44 + m18 * x45 + m25 * x46 + m32 * x47 + m39 * x48 + m46 * x49; 37383287d42SBarry Smith x[46] -= m5 * x43 + m12 * x44 + m19 * x45 + m26 * x46 + m33 * x47 + m40 * x48 + m47 * x49; 37483287d42SBarry Smith x[47] -= m6 * x43 + m13 * x44 + m20 * x45 + m27 * x46 + m34 * x47 + m41 * x48 + m48 * x49; 37583287d42SBarry Smith x[48] -= m7 * x43 + m14 * x44 + m21 * x45 + m28 * x46 + m35 * x47 + m42 * x48 + m49 * x49; 37683287d42SBarry Smith pv += 49; 37783287d42SBarry Smith } 3789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637.0)); 37983287d42SBarry Smith } 38083287d42SBarry Smith row = *ajtmp++; 38183287d42SBarry Smith } 38283287d42SBarry Smith /* finished row so stick it into b->a */ 38383287d42SBarry Smith pv = ba + 49 * bi[i]; 38483287d42SBarry Smith pj = bj + bi[i]; 38583287d42SBarry Smith nz = bi[i + 1] - bi[i]; 38683287d42SBarry Smith for (j = 0; j < nz; j++) { 38783287d42SBarry Smith x = rtmp + 49 * pj[j]; 3889371c9d4SSatish Balay pv[0] = x[0]; 3899371c9d4SSatish Balay pv[1] = x[1]; 3909371c9d4SSatish Balay pv[2] = x[2]; 3919371c9d4SSatish Balay pv[3] = x[3]; 3929371c9d4SSatish Balay pv[4] = x[4]; 3939371c9d4SSatish Balay pv[5] = x[5]; 3949371c9d4SSatish Balay pv[6] = x[6]; 3959371c9d4SSatish Balay pv[7] = x[7]; 3969371c9d4SSatish Balay pv[8] = x[8]; 3979371c9d4SSatish Balay pv[9] = x[9]; 3989371c9d4SSatish Balay pv[10] = x[10]; 3999371c9d4SSatish Balay pv[11] = x[11]; 4009371c9d4SSatish Balay pv[12] = x[12]; 4019371c9d4SSatish Balay pv[13] = x[13]; 4029371c9d4SSatish Balay pv[14] = x[14]; 4039371c9d4SSatish Balay pv[15] = x[15]; 4049371c9d4SSatish Balay pv[16] = x[16]; 4059371c9d4SSatish Balay pv[17] = x[17]; 4069371c9d4SSatish Balay pv[18] = x[18]; 4079371c9d4SSatish Balay pv[19] = x[19]; 4089371c9d4SSatish Balay pv[20] = x[20]; 4099371c9d4SSatish Balay pv[21] = x[21]; 4109371c9d4SSatish Balay pv[22] = x[22]; 4119371c9d4SSatish Balay pv[23] = x[23]; 4129371c9d4SSatish Balay pv[24] = x[24]; 4139371c9d4SSatish Balay pv[25] = x[25]; 4149371c9d4SSatish Balay pv[26] = x[26]; 4159371c9d4SSatish Balay pv[27] = x[27]; 4169371c9d4SSatish Balay pv[28] = x[28]; 4179371c9d4SSatish Balay pv[29] = x[29]; 4189371c9d4SSatish Balay pv[30] = x[30]; 4199371c9d4SSatish Balay pv[31] = x[31]; 4209371c9d4SSatish Balay pv[32] = x[32]; 4219371c9d4SSatish Balay pv[33] = x[33]; 4229371c9d4SSatish Balay pv[34] = x[34]; 4239371c9d4SSatish Balay pv[35] = x[35]; 4249371c9d4SSatish Balay pv[36] = x[36]; 4259371c9d4SSatish Balay pv[37] = x[37]; 4269371c9d4SSatish Balay pv[38] = x[38]; 4279371c9d4SSatish Balay pv[39] = x[39]; 4289371c9d4SSatish Balay pv[40] = x[40]; 4299371c9d4SSatish Balay pv[41] = x[41]; 4309371c9d4SSatish Balay pv[42] = x[42]; 4319371c9d4SSatish Balay pv[43] = x[43]; 4329371c9d4SSatish Balay pv[44] = x[44]; 4339371c9d4SSatish Balay pv[45] = x[45]; 4349371c9d4SSatish Balay pv[46] = x[46]; 4359371c9d4SSatish Balay pv[47] = x[47]; 43683287d42SBarry Smith pv[48] = x[48]; 43783287d42SBarry Smith pv += 49; 43883287d42SBarry Smith } 43983287d42SBarry Smith /* invert diagonal block */ 44083287d42SBarry Smith w = ba + 49 * diag_offset[i]; 4419566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(w, shift, allowzeropivot, &zeropivotdetected)); 4427b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 44383287d42SBarry Smith } 44483287d42SBarry Smith 4459566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 4469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 4479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 44826fbe8dcSKarl Rupp 44906e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_7_inplace; 45006e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_inplace; 45183287d42SBarry Smith C->assembled = PETSC_TRUE; 45226fbe8dcSKarl Rupp 4539566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * b->mbs)); /* from inverting diagonal blocks */ 45483287d42SBarry Smith PetscFunctionReturn(0); 45583287d42SBarry Smith } 456bef36659SShri Abhyankar 457*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat B, Mat A, const MatFactorInfo *info) 458*d71ae5a4SJacob Faibussowitsch { 459bef36659SShri Abhyankar Mat C = B; 460bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 461bef36659SShri Abhyankar IS isrow = b->row, isicol = b->icol; 4625a586d82SBarry Smith const PetscInt *r, *ic; 463bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row; 464bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 465bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; 466bef36659SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; 467bbd65245SShri Abhyankar PetscInt flg; 468182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 469a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 470bef36659SShri Abhyankar 471bef36659SShri Abhyankar PetscFunctionBegin; 4720164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 4739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4749566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 475bef36659SShri Abhyankar 476bef36659SShri Abhyankar /* generate work space needed by the factorization */ 4779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 4789566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 479bef36659SShri Abhyankar 480bef36659SShri Abhyankar for (i = 0; i < n; i++) { 481bef36659SShri Abhyankar /* zero rtmp */ 482bef36659SShri Abhyankar /* L part */ 483bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 484bef36659SShri Abhyankar bjtmp = bj + bi[i]; 48548a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 486bef36659SShri Abhyankar 487bef36659SShri Abhyankar /* U part */ 48835aa4fcfSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 48935aa4fcfSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 49048a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 49135aa4fcfSShri Abhyankar 49235aa4fcfSShri Abhyankar /* load in initial (unfactored row) */ 49335aa4fcfSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 49435aa4fcfSShri Abhyankar ajtmp = aj + ai[r[i]]; 49535aa4fcfSShri Abhyankar v = aa + bs2 * ai[r[i]]; 49648a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2)); 49735aa4fcfSShri Abhyankar 49835aa4fcfSShri Abhyankar /* elimination */ 49935aa4fcfSShri Abhyankar bjtmp = bj + bi[i]; 50035aa4fcfSShri Abhyankar nzL = bi[i + 1] - bi[i]; 50135aa4fcfSShri Abhyankar for (k = 0; k < nzL; k++) { 50235aa4fcfSShri Abhyankar row = bjtmp[k]; 50335aa4fcfSShri Abhyankar pc = rtmp + bs2 * row; 504c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) { 505c35f09e5SBarry Smith if (pc[j] != 0.0) { 506c35f09e5SBarry Smith flg = 1; 507c35f09e5SBarry Smith break; 508c35f09e5SBarry Smith } 509c35f09e5SBarry Smith } 51035aa4fcfSShri Abhyankar if (flg) { 51135aa4fcfSShri Abhyankar pv = b->a + bs2 * bdiag[row]; 51296b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 5139566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_7(pc, pv, mwork)); 51435aa4fcfSShri Abhyankar 515a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 51635aa4fcfSShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 51735aa4fcfSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 51835aa4fcfSShri Abhyankar for (j = 0; j < nz; j++) { 51996b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 52035aa4fcfSShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 52135aa4fcfSShri Abhyankar v = rtmp + bs2 * pj[j]; 5229566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_7(v, pc, pv)); 52335aa4fcfSShri Abhyankar pv += bs2; 52435aa4fcfSShri Abhyankar } 5259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 52635aa4fcfSShri Abhyankar } 52735aa4fcfSShri Abhyankar } 52835aa4fcfSShri Abhyankar 52935aa4fcfSShri Abhyankar /* finished row so stick it into b->a */ 53035aa4fcfSShri Abhyankar /* L part */ 53135aa4fcfSShri Abhyankar pv = b->a + bs2 * bi[i]; 53235aa4fcfSShri Abhyankar pj = b->j + bi[i]; 53335aa4fcfSShri Abhyankar nz = bi[i + 1] - bi[i]; 53448a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 53535aa4fcfSShri Abhyankar 536a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 53735aa4fcfSShri Abhyankar pv = b->a + bs2 * bdiag[i]; 53835aa4fcfSShri Abhyankar pj = b->j + bdiag[i]; 5399566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 5409566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(pv, shift, allowzeropivot, &zeropivotdetected)); 5417b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54235aa4fcfSShri Abhyankar 54335aa4fcfSShri Abhyankar /* U part */ 54435aa4fcfSShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 54535aa4fcfSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 54635aa4fcfSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 54748a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 54835aa4fcfSShri Abhyankar } 54935aa4fcfSShri Abhyankar 5509566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 5519566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5529566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 55326fbe8dcSKarl Rupp 5544dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_7; 5554dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 55635aa4fcfSShri Abhyankar C->assembled = PETSC_TRUE; 55726fbe8dcSKarl Rupp 5589566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * n)); /* from inverting diagonal blocks */ 55935aa4fcfSShri Abhyankar PetscFunctionReturn(0); 56035aa4fcfSShri Abhyankar } 56135aa4fcfSShri Abhyankar 562*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 563*d71ae5a4SJacob Faibussowitsch { 564bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 565bef36659SShri Abhyankar PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j; 566bef36659SShri Abhyankar PetscInt *ajtmpold, *ajtmp, nz, row; 567bef36659SShri Abhyankar PetscInt *diag_offset = b->diag, *ai = a->i, *aj = a->j, *pj; 568bef36659SShri Abhyankar MatScalar *pv, *v, *rtmp, *pc, *w, *x; 569bef36659SShri Abhyankar MatScalar x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 570bef36659SShri Abhyankar MatScalar x16, x17, x18, x19, x20, x21, x22, x23, x24, x25; 571bef36659SShri Abhyankar MatScalar p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15; 572bef36659SShri Abhyankar MatScalar p16, p17, p18, p19, p20, p21, p22, p23, p24, p25; 573bef36659SShri Abhyankar MatScalar m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15; 574bef36659SShri Abhyankar MatScalar m16, m17, m18, m19, m20, m21, m22, m23, m24, m25; 575bef36659SShri Abhyankar MatScalar p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36; 576bef36659SShri Abhyankar MatScalar p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49; 577bef36659SShri Abhyankar MatScalar x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36; 578bef36659SShri Abhyankar MatScalar x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49; 579bef36659SShri Abhyankar MatScalar m26, m27, m28, m29, m30, m31, m32, m33, m34, m35, m36; 580bef36659SShri Abhyankar MatScalar m37, m38, m39, m40, m41, m42, m43, m44, m45, m46, m47, m48, m49; 581bef36659SShri Abhyankar MatScalar *ba = b->a, *aa = a->a; 582182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 583a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 584bef36659SShri Abhyankar 585bef36659SShri Abhyankar PetscFunctionBegin; 5860164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 5879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(49 * (n + 1), &rtmp)); 588bef36659SShri Abhyankar for (i = 0; i < n; i++) { 589bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 590bef36659SShri Abhyankar ajtmp = bj + bi[i]; 591bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 592bef36659SShri Abhyankar x = rtmp + 49 * ajtmp[j]; 593bef36659SShri Abhyankar x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = x[9] = 0.0; 594bef36659SShri Abhyankar x[10] = x[11] = x[12] = x[13] = x[14] = x[15] = x[16] = x[17] = 0.0; 595bef36659SShri Abhyankar x[18] = x[19] = x[20] = x[21] = x[22] = x[23] = x[24] = x[25] = 0.0; 596bef36659SShri Abhyankar x[26] = x[27] = x[28] = x[29] = x[30] = x[31] = x[32] = x[33] = 0.0; 597bef36659SShri Abhyankar x[34] = x[35] = x[36] = x[37] = x[38] = x[39] = x[40] = x[41] = 0.0; 598bef36659SShri Abhyankar x[42] = x[43] = x[44] = x[45] = x[46] = x[47] = x[48] = 0.0; 599bef36659SShri Abhyankar } 600bef36659SShri Abhyankar /* load in initial (unfactored row) */ 601bef36659SShri Abhyankar nz = ai[i + 1] - ai[i]; 602bef36659SShri Abhyankar ajtmpold = aj + ai[i]; 603bef36659SShri Abhyankar v = aa + 49 * ai[i]; 604bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 605bef36659SShri Abhyankar x = rtmp + 49 * ajtmpold[j]; 6069371c9d4SSatish Balay x[0] = v[0]; 6079371c9d4SSatish Balay x[1] = v[1]; 6089371c9d4SSatish Balay x[2] = v[2]; 6099371c9d4SSatish Balay x[3] = v[3]; 6109371c9d4SSatish Balay x[4] = v[4]; 6119371c9d4SSatish Balay x[5] = v[5]; 6129371c9d4SSatish Balay x[6] = v[6]; 6139371c9d4SSatish Balay x[7] = v[7]; 6149371c9d4SSatish Balay x[8] = v[8]; 6159371c9d4SSatish Balay x[9] = v[9]; 6169371c9d4SSatish Balay x[10] = v[10]; 6179371c9d4SSatish Balay x[11] = v[11]; 6189371c9d4SSatish Balay x[12] = v[12]; 6199371c9d4SSatish Balay x[13] = v[13]; 6209371c9d4SSatish Balay x[14] = v[14]; 6219371c9d4SSatish Balay x[15] = v[15]; 6229371c9d4SSatish Balay x[16] = v[16]; 6239371c9d4SSatish Balay x[17] = v[17]; 6249371c9d4SSatish Balay x[18] = v[18]; 6259371c9d4SSatish Balay x[19] = v[19]; 6269371c9d4SSatish Balay x[20] = v[20]; 6279371c9d4SSatish Balay x[21] = v[21]; 6289371c9d4SSatish Balay x[22] = v[22]; 6299371c9d4SSatish Balay x[23] = v[23]; 6309371c9d4SSatish Balay x[24] = v[24]; 6319371c9d4SSatish Balay x[25] = v[25]; 6329371c9d4SSatish Balay x[26] = v[26]; 6339371c9d4SSatish Balay x[27] = v[27]; 6349371c9d4SSatish Balay x[28] = v[28]; 6359371c9d4SSatish Balay x[29] = v[29]; 6369371c9d4SSatish Balay x[30] = v[30]; 6379371c9d4SSatish Balay x[31] = v[31]; 6389371c9d4SSatish Balay x[32] = v[32]; 6399371c9d4SSatish Balay x[33] = v[33]; 6409371c9d4SSatish Balay x[34] = v[34]; 6419371c9d4SSatish Balay x[35] = v[35]; 6429371c9d4SSatish Balay x[36] = v[36]; 6439371c9d4SSatish Balay x[37] = v[37]; 6449371c9d4SSatish Balay x[38] = v[38]; 6459371c9d4SSatish Balay x[39] = v[39]; 6469371c9d4SSatish Balay x[40] = v[40]; 6479371c9d4SSatish Balay x[41] = v[41]; 6489371c9d4SSatish Balay x[42] = v[42]; 6499371c9d4SSatish Balay x[43] = v[43]; 6509371c9d4SSatish Balay x[44] = v[44]; 6519371c9d4SSatish Balay x[45] = v[45]; 6529371c9d4SSatish Balay x[46] = v[46]; 6539371c9d4SSatish Balay x[47] = v[47]; 654bef36659SShri Abhyankar x[48] = v[48]; 655bef36659SShri Abhyankar v += 49; 656bef36659SShri Abhyankar } 657bef36659SShri Abhyankar row = *ajtmp++; 658bef36659SShri Abhyankar while (row < i) { 659bef36659SShri Abhyankar pc = rtmp + 49 * row; 6609371c9d4SSatish Balay p1 = pc[0]; 6619371c9d4SSatish Balay p2 = pc[1]; 6629371c9d4SSatish Balay p3 = pc[2]; 6639371c9d4SSatish Balay p4 = pc[3]; 6649371c9d4SSatish Balay p5 = pc[4]; 6659371c9d4SSatish Balay p6 = pc[5]; 6669371c9d4SSatish Balay p7 = pc[6]; 6679371c9d4SSatish Balay p8 = pc[7]; 6689371c9d4SSatish Balay p9 = pc[8]; 6699371c9d4SSatish Balay p10 = pc[9]; 6709371c9d4SSatish Balay p11 = pc[10]; 6719371c9d4SSatish Balay p12 = pc[11]; 6729371c9d4SSatish Balay p13 = pc[12]; 6739371c9d4SSatish Balay p14 = pc[13]; 6749371c9d4SSatish Balay p15 = pc[14]; 6759371c9d4SSatish Balay p16 = pc[15]; 6769371c9d4SSatish Balay p17 = pc[16]; 6779371c9d4SSatish Balay p18 = pc[17]; 6789371c9d4SSatish Balay p19 = pc[18]; 6799371c9d4SSatish Balay p20 = pc[19]; 6809371c9d4SSatish Balay p21 = pc[20]; 6819371c9d4SSatish Balay p22 = pc[21]; 6829371c9d4SSatish Balay p23 = pc[22]; 6839371c9d4SSatish Balay p24 = pc[23]; 6849371c9d4SSatish Balay p25 = pc[24]; 6859371c9d4SSatish Balay p26 = pc[25]; 6869371c9d4SSatish Balay p27 = pc[26]; 6879371c9d4SSatish Balay p28 = pc[27]; 6889371c9d4SSatish Balay p29 = pc[28]; 6899371c9d4SSatish Balay p30 = pc[29]; 6909371c9d4SSatish Balay p31 = pc[30]; 6919371c9d4SSatish Balay p32 = pc[31]; 6929371c9d4SSatish Balay p33 = pc[32]; 6939371c9d4SSatish Balay p34 = pc[33]; 6949371c9d4SSatish Balay p35 = pc[34]; 6959371c9d4SSatish Balay p36 = pc[35]; 6969371c9d4SSatish Balay p37 = pc[36]; 6979371c9d4SSatish Balay p38 = pc[37]; 6989371c9d4SSatish Balay p39 = pc[38]; 6999371c9d4SSatish Balay p40 = pc[39]; 7009371c9d4SSatish Balay p41 = pc[40]; 7019371c9d4SSatish Balay p42 = pc[41]; 7029371c9d4SSatish Balay p43 = pc[42]; 7039371c9d4SSatish Balay p44 = pc[43]; 7049371c9d4SSatish Balay p45 = pc[44]; 7059371c9d4SSatish Balay p46 = pc[45]; 7069371c9d4SSatish Balay p47 = pc[46]; 7079371c9d4SSatish Balay p48 = pc[47]; 708bef36659SShri Abhyankar p49 = pc[48]; 7099371c9d4SSatish Balay if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 || p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0 || p10 != 0.0 || p11 != 0.0 || p12 != 0.0 || p13 != 0.0 || p14 != 0.0 || p15 != 0.0 || p16 != 0.0 || p17 != 0.0 || p18 != 0.0 || p19 != 0.0 || p20 != 0.0 || p21 != 0.0 || p22 != 0.0 || p23 != 0.0 || p24 != 0.0 || p25 != 0.0 || p26 != 0.0 || p27 != 0.0 || p28 != 0.0 || p29 != 0.0 || p30 != 0.0 || p31 != 0.0 || p32 != 0.0 || p33 != 0.0 || p34 != 0.0 || p35 != 0.0 || p36 != 0.0 || p37 != 0.0 || p38 != 0.0 || p39 != 0.0 || p40 != 0.0 || p41 != 0.0 || p42 != 0.0 || p43 != 0.0 || p44 != 0.0 || p45 != 0.0 || p46 != 0.0 || p47 != 0.0 || p48 != 0.0 || p49 != 0.0) { 710bef36659SShri Abhyankar pv = ba + 49 * diag_offset[row]; 711bef36659SShri Abhyankar pj = bj + diag_offset[row] + 1; 7129371c9d4SSatish Balay x1 = pv[0]; 7139371c9d4SSatish Balay x2 = pv[1]; 7149371c9d4SSatish Balay x3 = pv[2]; 7159371c9d4SSatish Balay x4 = pv[3]; 7169371c9d4SSatish Balay x5 = pv[4]; 7179371c9d4SSatish Balay x6 = pv[5]; 7189371c9d4SSatish Balay x7 = pv[6]; 7199371c9d4SSatish Balay x8 = pv[7]; 7209371c9d4SSatish Balay x9 = pv[8]; 7219371c9d4SSatish Balay x10 = pv[9]; 7229371c9d4SSatish Balay x11 = pv[10]; 7239371c9d4SSatish Balay x12 = pv[11]; 7249371c9d4SSatish Balay x13 = pv[12]; 7259371c9d4SSatish Balay x14 = pv[13]; 7269371c9d4SSatish Balay x15 = pv[14]; 7279371c9d4SSatish Balay x16 = pv[15]; 7289371c9d4SSatish Balay x17 = pv[16]; 7299371c9d4SSatish Balay x18 = pv[17]; 7309371c9d4SSatish Balay x19 = pv[18]; 7319371c9d4SSatish Balay x20 = pv[19]; 7329371c9d4SSatish Balay x21 = pv[20]; 7339371c9d4SSatish Balay x22 = pv[21]; 7349371c9d4SSatish Balay x23 = pv[22]; 7359371c9d4SSatish Balay x24 = pv[23]; 7369371c9d4SSatish Balay x25 = pv[24]; 7379371c9d4SSatish Balay x26 = pv[25]; 7389371c9d4SSatish Balay x27 = pv[26]; 7399371c9d4SSatish Balay x28 = pv[27]; 7409371c9d4SSatish Balay x29 = pv[28]; 7419371c9d4SSatish Balay x30 = pv[29]; 7429371c9d4SSatish Balay x31 = pv[30]; 7439371c9d4SSatish Balay x32 = pv[31]; 7449371c9d4SSatish Balay x33 = pv[32]; 7459371c9d4SSatish Balay x34 = pv[33]; 7469371c9d4SSatish Balay x35 = pv[34]; 7479371c9d4SSatish Balay x36 = pv[35]; 7489371c9d4SSatish Balay x37 = pv[36]; 7499371c9d4SSatish Balay x38 = pv[37]; 7509371c9d4SSatish Balay x39 = pv[38]; 7519371c9d4SSatish Balay x40 = pv[39]; 7529371c9d4SSatish Balay x41 = pv[40]; 7539371c9d4SSatish Balay x42 = pv[41]; 7549371c9d4SSatish Balay x43 = pv[42]; 7559371c9d4SSatish Balay x44 = pv[43]; 7569371c9d4SSatish Balay x45 = pv[44]; 7579371c9d4SSatish Balay x46 = pv[45]; 7589371c9d4SSatish Balay x47 = pv[46]; 7599371c9d4SSatish Balay x48 = pv[47]; 760bef36659SShri Abhyankar x49 = pv[48]; 761bef36659SShri Abhyankar pc[0] = m1 = p1 * x1 + p8 * x2 + p15 * x3 + p22 * x4 + p29 * x5 + p36 * x6 + p43 * x7; 762bef36659SShri Abhyankar pc[1] = m2 = p2 * x1 + p9 * x2 + p16 * x3 + p23 * x4 + p30 * x5 + p37 * x6 + p44 * x7; 763bef36659SShri Abhyankar pc[2] = m3 = p3 * x1 + p10 * x2 + p17 * x3 + p24 * x4 + p31 * x5 + p38 * x6 + p45 * x7; 764bef36659SShri Abhyankar pc[3] = m4 = p4 * x1 + p11 * x2 + p18 * x3 + p25 * x4 + p32 * x5 + p39 * x6 + p46 * x7; 765bef36659SShri Abhyankar pc[4] = m5 = p5 * x1 + p12 * x2 + p19 * x3 + p26 * x4 + p33 * x5 + p40 * x6 + p47 * x7; 766bef36659SShri Abhyankar pc[5] = m6 = p6 * x1 + p13 * x2 + p20 * x3 + p27 * x4 + p34 * x5 + p41 * x6 + p48 * x7; 767bef36659SShri Abhyankar pc[6] = m7 = p7 * x1 + p14 * x2 + p21 * x3 + p28 * x4 + p35 * x5 + p42 * x6 + p49 * x7; 768bef36659SShri Abhyankar 769bef36659SShri Abhyankar pc[7] = m8 = p1 * x8 + p8 * x9 + p15 * x10 + p22 * x11 + p29 * x12 + p36 * x13 + p43 * x14; 770bef36659SShri Abhyankar pc[8] = m9 = p2 * x8 + p9 * x9 + p16 * x10 + p23 * x11 + p30 * x12 + p37 * x13 + p44 * x14; 771bef36659SShri Abhyankar pc[9] = m10 = p3 * x8 + p10 * x9 + p17 * x10 + p24 * x11 + p31 * x12 + p38 * x13 + p45 * x14; 772bef36659SShri Abhyankar pc[10] = m11 = p4 * x8 + p11 * x9 + p18 * x10 + p25 * x11 + p32 * x12 + p39 * x13 + p46 * x14; 773bef36659SShri Abhyankar pc[11] = m12 = p5 * x8 + p12 * x9 + p19 * x10 + p26 * x11 + p33 * x12 + p40 * x13 + p47 * x14; 774bef36659SShri Abhyankar pc[12] = m13 = p6 * x8 + p13 * x9 + p20 * x10 + p27 * x11 + p34 * x12 + p41 * x13 + p48 * x14; 775bef36659SShri Abhyankar pc[13] = m14 = p7 * x8 + p14 * x9 + p21 * x10 + p28 * x11 + p35 * x12 + p42 * x13 + p49 * x14; 776bef36659SShri Abhyankar 777bef36659SShri Abhyankar pc[14] = m15 = p1 * x15 + p8 * x16 + p15 * x17 + p22 * x18 + p29 * x19 + p36 * x20 + p43 * x21; 778bef36659SShri Abhyankar pc[15] = m16 = p2 * x15 + p9 * x16 + p16 * x17 + p23 * x18 + p30 * x19 + p37 * x20 + p44 * x21; 779bef36659SShri Abhyankar pc[16] = m17 = p3 * x15 + p10 * x16 + p17 * x17 + p24 * x18 + p31 * x19 + p38 * x20 + p45 * x21; 780bef36659SShri Abhyankar pc[17] = m18 = p4 * x15 + p11 * x16 + p18 * x17 + p25 * x18 + p32 * x19 + p39 * x20 + p46 * x21; 781bef36659SShri Abhyankar pc[18] = m19 = p5 * x15 + p12 * x16 + p19 * x17 + p26 * x18 + p33 * x19 + p40 * x20 + p47 * x21; 782bef36659SShri Abhyankar pc[19] = m20 = p6 * x15 + p13 * x16 + p20 * x17 + p27 * x18 + p34 * x19 + p41 * x20 + p48 * x21; 783bef36659SShri Abhyankar pc[20] = m21 = p7 * x15 + p14 * x16 + p21 * x17 + p28 * x18 + p35 * x19 + p42 * x20 + p49 * x21; 784bef36659SShri Abhyankar 785bef36659SShri Abhyankar pc[21] = m22 = p1 * x22 + p8 * x23 + p15 * x24 + p22 * x25 + p29 * x26 + p36 * x27 + p43 * x28; 786bef36659SShri Abhyankar pc[22] = m23 = p2 * x22 + p9 * x23 + p16 * x24 + p23 * x25 + p30 * x26 + p37 * x27 + p44 * x28; 787bef36659SShri Abhyankar pc[23] = m24 = p3 * x22 + p10 * x23 + p17 * x24 + p24 * x25 + p31 * x26 + p38 * x27 + p45 * x28; 788bef36659SShri Abhyankar pc[24] = m25 = p4 * x22 + p11 * x23 + p18 * x24 + p25 * x25 + p32 * x26 + p39 * x27 + p46 * x28; 789bef36659SShri Abhyankar pc[25] = m26 = p5 * x22 + p12 * x23 + p19 * x24 + p26 * x25 + p33 * x26 + p40 * x27 + p47 * x28; 790bef36659SShri Abhyankar pc[26] = m27 = p6 * x22 + p13 * x23 + p20 * x24 + p27 * x25 + p34 * x26 + p41 * x27 + p48 * x28; 791bef36659SShri Abhyankar pc[27] = m28 = p7 * x22 + p14 * x23 + p21 * x24 + p28 * x25 + p35 * x26 + p42 * x27 + p49 * x28; 792bef36659SShri Abhyankar 793bef36659SShri Abhyankar pc[28] = m29 = p1 * x29 + p8 * x30 + p15 * x31 + p22 * x32 + p29 * x33 + p36 * x34 + p43 * x35; 794bef36659SShri Abhyankar pc[29] = m30 = p2 * x29 + p9 * x30 + p16 * x31 + p23 * x32 + p30 * x33 + p37 * x34 + p44 * x35; 795bef36659SShri Abhyankar pc[30] = m31 = p3 * x29 + p10 * x30 + p17 * x31 + p24 * x32 + p31 * x33 + p38 * x34 + p45 * x35; 796bef36659SShri Abhyankar pc[31] = m32 = p4 * x29 + p11 * x30 + p18 * x31 + p25 * x32 + p32 * x33 + p39 * x34 + p46 * x35; 797bef36659SShri Abhyankar pc[32] = m33 = p5 * x29 + p12 * x30 + p19 * x31 + p26 * x32 + p33 * x33 + p40 * x34 + p47 * x35; 798bef36659SShri Abhyankar pc[33] = m34 = p6 * x29 + p13 * x30 + p20 * x31 + p27 * x32 + p34 * x33 + p41 * x34 + p48 * x35; 799bef36659SShri Abhyankar pc[34] = m35 = p7 * x29 + p14 * x30 + p21 * x31 + p28 * x32 + p35 * x33 + p42 * x34 + p49 * x35; 800bef36659SShri Abhyankar 801bef36659SShri Abhyankar pc[35] = m36 = p1 * x36 + p8 * x37 + p15 * x38 + p22 * x39 + p29 * x40 + p36 * x41 + p43 * x42; 802bef36659SShri Abhyankar pc[36] = m37 = p2 * x36 + p9 * x37 + p16 * x38 + p23 * x39 + p30 * x40 + p37 * x41 + p44 * x42; 803bef36659SShri Abhyankar pc[37] = m38 = p3 * x36 + p10 * x37 + p17 * x38 + p24 * x39 + p31 * x40 + p38 * x41 + p45 * x42; 804bef36659SShri Abhyankar pc[38] = m39 = p4 * x36 + p11 * x37 + p18 * x38 + p25 * x39 + p32 * x40 + p39 * x41 + p46 * x42; 805bef36659SShri Abhyankar pc[39] = m40 = p5 * x36 + p12 * x37 + p19 * x38 + p26 * x39 + p33 * x40 + p40 * x41 + p47 * x42; 806bef36659SShri Abhyankar pc[40] = m41 = p6 * x36 + p13 * x37 + p20 * x38 + p27 * x39 + p34 * x40 + p41 * x41 + p48 * x42; 807bef36659SShri Abhyankar pc[41] = m42 = p7 * x36 + p14 * x37 + p21 * x38 + p28 * x39 + p35 * x40 + p42 * x41 + p49 * x42; 808bef36659SShri Abhyankar 809bef36659SShri Abhyankar pc[42] = m43 = p1 * x43 + p8 * x44 + p15 * x45 + p22 * x46 + p29 * x47 + p36 * x48 + p43 * x49; 810bef36659SShri Abhyankar pc[43] = m44 = p2 * x43 + p9 * x44 + p16 * x45 + p23 * x46 + p30 * x47 + p37 * x48 + p44 * x49; 811bef36659SShri Abhyankar pc[44] = m45 = p3 * x43 + p10 * x44 + p17 * x45 + p24 * x46 + p31 * x47 + p38 * x48 + p45 * x49; 812bef36659SShri Abhyankar pc[45] = m46 = p4 * x43 + p11 * x44 + p18 * x45 + p25 * x46 + p32 * x47 + p39 * x48 + p46 * x49; 813bef36659SShri Abhyankar pc[46] = m47 = p5 * x43 + p12 * x44 + p19 * x45 + p26 * x46 + p33 * x47 + p40 * x48 + p47 * x49; 814bef36659SShri Abhyankar pc[47] = m48 = p6 * x43 + p13 * x44 + p20 * x45 + p27 * x46 + p34 * x47 + p41 * x48 + p48 * x49; 815bef36659SShri Abhyankar pc[48] = m49 = p7 * x43 + p14 * x44 + p21 * x45 + p28 * x46 + p35 * x47 + p42 * x48 + p49 * x49; 816bef36659SShri Abhyankar 817bef36659SShri Abhyankar nz = bi[row + 1] - diag_offset[row] - 1; 818bef36659SShri Abhyankar pv += 49; 819bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 8209371c9d4SSatish Balay x1 = pv[0]; 8219371c9d4SSatish Balay x2 = pv[1]; 8229371c9d4SSatish Balay x3 = pv[2]; 8239371c9d4SSatish Balay x4 = pv[3]; 8249371c9d4SSatish Balay x5 = pv[4]; 8259371c9d4SSatish Balay x6 = pv[5]; 8269371c9d4SSatish Balay x7 = pv[6]; 8279371c9d4SSatish Balay x8 = pv[7]; 8289371c9d4SSatish Balay x9 = pv[8]; 8299371c9d4SSatish Balay x10 = pv[9]; 8309371c9d4SSatish Balay x11 = pv[10]; 8319371c9d4SSatish Balay x12 = pv[11]; 8329371c9d4SSatish Balay x13 = pv[12]; 8339371c9d4SSatish Balay x14 = pv[13]; 8349371c9d4SSatish Balay x15 = pv[14]; 8359371c9d4SSatish Balay x16 = pv[15]; 8369371c9d4SSatish Balay x17 = pv[16]; 8379371c9d4SSatish Balay x18 = pv[17]; 8389371c9d4SSatish Balay x19 = pv[18]; 8399371c9d4SSatish Balay x20 = pv[19]; 8409371c9d4SSatish Balay x21 = pv[20]; 8419371c9d4SSatish Balay x22 = pv[21]; 8429371c9d4SSatish Balay x23 = pv[22]; 8439371c9d4SSatish Balay x24 = pv[23]; 8449371c9d4SSatish Balay x25 = pv[24]; 8459371c9d4SSatish Balay x26 = pv[25]; 8469371c9d4SSatish Balay x27 = pv[26]; 8479371c9d4SSatish Balay x28 = pv[27]; 8489371c9d4SSatish Balay x29 = pv[28]; 8499371c9d4SSatish Balay x30 = pv[29]; 8509371c9d4SSatish Balay x31 = pv[30]; 8519371c9d4SSatish Balay x32 = pv[31]; 8529371c9d4SSatish Balay x33 = pv[32]; 8539371c9d4SSatish Balay x34 = pv[33]; 8549371c9d4SSatish Balay x35 = pv[34]; 8559371c9d4SSatish Balay x36 = pv[35]; 8569371c9d4SSatish Balay x37 = pv[36]; 8579371c9d4SSatish Balay x38 = pv[37]; 8589371c9d4SSatish Balay x39 = pv[38]; 8599371c9d4SSatish Balay x40 = pv[39]; 8609371c9d4SSatish Balay x41 = pv[40]; 8619371c9d4SSatish Balay x42 = pv[41]; 8629371c9d4SSatish Balay x43 = pv[42]; 8639371c9d4SSatish Balay x44 = pv[43]; 8649371c9d4SSatish Balay x45 = pv[44]; 8659371c9d4SSatish Balay x46 = pv[45]; 8669371c9d4SSatish Balay x47 = pv[46]; 8679371c9d4SSatish Balay x48 = pv[47]; 868bef36659SShri Abhyankar x49 = pv[48]; 869bef36659SShri Abhyankar x = rtmp + 49 * pj[j]; 870bef36659SShri Abhyankar x[0] -= m1 * x1 + m8 * x2 + m15 * x3 + m22 * x4 + m29 * x5 + m36 * x6 + m43 * x7; 871bef36659SShri Abhyankar x[1] -= m2 * x1 + m9 * x2 + m16 * x3 + m23 * x4 + m30 * x5 + m37 * x6 + m44 * x7; 872bef36659SShri Abhyankar x[2] -= m3 * x1 + m10 * x2 + m17 * x3 + m24 * x4 + m31 * x5 + m38 * x6 + m45 * x7; 873bef36659SShri Abhyankar x[3] -= m4 * x1 + m11 * x2 + m18 * x3 + m25 * x4 + m32 * x5 + m39 * x6 + m46 * x7; 874bef36659SShri Abhyankar x[4] -= m5 * x1 + m12 * x2 + m19 * x3 + m26 * x4 + m33 * x5 + m40 * x6 + m47 * x7; 875bef36659SShri Abhyankar x[5] -= m6 * x1 + m13 * x2 + m20 * x3 + m27 * x4 + m34 * x5 + m41 * x6 + m48 * x7; 876bef36659SShri Abhyankar x[6] -= m7 * x1 + m14 * x2 + m21 * x3 + m28 * x4 + m35 * x5 + m42 * x6 + m49 * x7; 877bef36659SShri Abhyankar 878bef36659SShri Abhyankar x[7] -= m1 * x8 + m8 * x9 + m15 * x10 + m22 * x11 + m29 * x12 + m36 * x13 + m43 * x14; 879bef36659SShri Abhyankar x[8] -= m2 * x8 + m9 * x9 + m16 * x10 + m23 * x11 + m30 * x12 + m37 * x13 + m44 * x14; 880bef36659SShri Abhyankar x[9] -= m3 * x8 + m10 * x9 + m17 * x10 + m24 * x11 + m31 * x12 + m38 * x13 + m45 * x14; 881bef36659SShri Abhyankar x[10] -= m4 * x8 + m11 * x9 + m18 * x10 + m25 * x11 + m32 * x12 + m39 * x13 + m46 * x14; 882bef36659SShri Abhyankar x[11] -= m5 * x8 + m12 * x9 + m19 * x10 + m26 * x11 + m33 * x12 + m40 * x13 + m47 * x14; 883bef36659SShri Abhyankar x[12] -= m6 * x8 + m13 * x9 + m20 * x10 + m27 * x11 + m34 * x12 + m41 * x13 + m48 * x14; 884bef36659SShri Abhyankar x[13] -= m7 * x8 + m14 * x9 + m21 * x10 + m28 * x11 + m35 * x12 + m42 * x13 + m49 * x14; 885bef36659SShri Abhyankar 886bef36659SShri Abhyankar x[14] -= m1 * x15 + m8 * x16 + m15 * x17 + m22 * x18 + m29 * x19 + m36 * x20 + m43 * x21; 887bef36659SShri Abhyankar x[15] -= m2 * x15 + m9 * x16 + m16 * x17 + m23 * x18 + m30 * x19 + m37 * x20 + m44 * x21; 888bef36659SShri Abhyankar x[16] -= m3 * x15 + m10 * x16 + m17 * x17 + m24 * x18 + m31 * x19 + m38 * x20 + m45 * x21; 889bef36659SShri Abhyankar x[17] -= m4 * x15 + m11 * x16 + m18 * x17 + m25 * x18 + m32 * x19 + m39 * x20 + m46 * x21; 890bef36659SShri Abhyankar x[18] -= m5 * x15 + m12 * x16 + m19 * x17 + m26 * x18 + m33 * x19 + m40 * x20 + m47 * x21; 891bef36659SShri Abhyankar x[19] -= m6 * x15 + m13 * x16 + m20 * x17 + m27 * x18 + m34 * x19 + m41 * x20 + m48 * x21; 892bef36659SShri Abhyankar x[20] -= m7 * x15 + m14 * x16 + m21 * x17 + m28 * x18 + m35 * x19 + m42 * x20 + m49 * x21; 893bef36659SShri Abhyankar 894bef36659SShri Abhyankar x[21] -= m1 * x22 + m8 * x23 + m15 * x24 + m22 * x25 + m29 * x26 + m36 * x27 + m43 * x28; 895bef36659SShri Abhyankar x[22] -= m2 * x22 + m9 * x23 + m16 * x24 + m23 * x25 + m30 * x26 + m37 * x27 + m44 * x28; 896bef36659SShri Abhyankar x[23] -= m3 * x22 + m10 * x23 + m17 * x24 + m24 * x25 + m31 * x26 + m38 * x27 + m45 * x28; 897bef36659SShri Abhyankar x[24] -= m4 * x22 + m11 * x23 + m18 * x24 + m25 * x25 + m32 * x26 + m39 * x27 + m46 * x28; 898bef36659SShri Abhyankar x[25] -= m5 * x22 + m12 * x23 + m19 * x24 + m26 * x25 + m33 * x26 + m40 * x27 + m47 * x28; 899bef36659SShri Abhyankar x[26] -= m6 * x22 + m13 * x23 + m20 * x24 + m27 * x25 + m34 * x26 + m41 * x27 + m48 * x28; 900bef36659SShri Abhyankar x[27] -= m7 * x22 + m14 * x23 + m21 * x24 + m28 * x25 + m35 * x26 + m42 * x27 + m49 * x28; 901bef36659SShri Abhyankar 902bef36659SShri Abhyankar x[28] -= m1 * x29 + m8 * x30 + m15 * x31 + m22 * x32 + m29 * x33 + m36 * x34 + m43 * x35; 903bef36659SShri Abhyankar x[29] -= m2 * x29 + m9 * x30 + m16 * x31 + m23 * x32 + m30 * x33 + m37 * x34 + m44 * x35; 904bef36659SShri Abhyankar x[30] -= m3 * x29 + m10 * x30 + m17 * x31 + m24 * x32 + m31 * x33 + m38 * x34 + m45 * x35; 905bef36659SShri Abhyankar x[31] -= m4 * x29 + m11 * x30 + m18 * x31 + m25 * x32 + m32 * x33 + m39 * x34 + m46 * x35; 906bef36659SShri Abhyankar x[32] -= m5 * x29 + m12 * x30 + m19 * x31 + m26 * x32 + m33 * x33 + m40 * x34 + m47 * x35; 907bef36659SShri Abhyankar x[33] -= m6 * x29 + m13 * x30 + m20 * x31 + m27 * x32 + m34 * x33 + m41 * x34 + m48 * x35; 908bef36659SShri Abhyankar x[34] -= m7 * x29 + m14 * x30 + m21 * x31 + m28 * x32 + m35 * x33 + m42 * x34 + m49 * x35; 909bef36659SShri Abhyankar 910bef36659SShri Abhyankar x[35] -= m1 * x36 + m8 * x37 + m15 * x38 + m22 * x39 + m29 * x40 + m36 * x41 + m43 * x42; 911bef36659SShri Abhyankar x[36] -= m2 * x36 + m9 * x37 + m16 * x38 + m23 * x39 + m30 * x40 + m37 * x41 + m44 * x42; 912bef36659SShri Abhyankar x[37] -= m3 * x36 + m10 * x37 + m17 * x38 + m24 * x39 + m31 * x40 + m38 * x41 + m45 * x42; 913bef36659SShri Abhyankar x[38] -= m4 * x36 + m11 * x37 + m18 * x38 + m25 * x39 + m32 * x40 + m39 * x41 + m46 * x42; 914bef36659SShri Abhyankar x[39] -= m5 * x36 + m12 * x37 + m19 * x38 + m26 * x39 + m33 * x40 + m40 * x41 + m47 * x42; 915bef36659SShri Abhyankar x[40] -= m6 * x36 + m13 * x37 + m20 * x38 + m27 * x39 + m34 * x40 + m41 * x41 + m48 * x42; 916bef36659SShri Abhyankar x[41] -= m7 * x36 + m14 * x37 + m21 * x38 + m28 * x39 + m35 * x40 + m42 * x41 + m49 * x42; 917bef36659SShri Abhyankar 918bef36659SShri Abhyankar x[42] -= m1 * x43 + m8 * x44 + m15 * x45 + m22 * x46 + m29 * x47 + m36 * x48 + m43 * x49; 919bef36659SShri Abhyankar x[43] -= m2 * x43 + m9 * x44 + m16 * x45 + m23 * x46 + m30 * x47 + m37 * x48 + m44 * x49; 920bef36659SShri Abhyankar x[44] -= m3 * x43 + m10 * x44 + m17 * x45 + m24 * x46 + m31 * x47 + m38 * x48 + m45 * x49; 921bef36659SShri Abhyankar x[45] -= m4 * x43 + m11 * x44 + m18 * x45 + m25 * x46 + m32 * x47 + m39 * x48 + m46 * x49; 922bef36659SShri Abhyankar x[46] -= m5 * x43 + m12 * x44 + m19 * x45 + m26 * x46 + m33 * x47 + m40 * x48 + m47 * x49; 923bef36659SShri Abhyankar x[47] -= m6 * x43 + m13 * x44 + m20 * x45 + m27 * x46 + m34 * x47 + m41 * x48 + m48 * x49; 924bef36659SShri Abhyankar x[48] -= m7 * x43 + m14 * x44 + m21 * x45 + m28 * x46 + m35 * x47 + m42 * x48 + m49 * x49; 925bef36659SShri Abhyankar pv += 49; 926bef36659SShri Abhyankar } 9279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637.0)); 928bef36659SShri Abhyankar } 929bef36659SShri Abhyankar row = *ajtmp++; 930bef36659SShri Abhyankar } 931bef36659SShri Abhyankar /* finished row so stick it into b->a */ 932bef36659SShri Abhyankar pv = ba + 49 * bi[i]; 933bef36659SShri Abhyankar pj = bj + bi[i]; 934bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 935bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 936bef36659SShri Abhyankar x = rtmp + 49 * pj[j]; 9379371c9d4SSatish Balay pv[0] = x[0]; 9389371c9d4SSatish Balay pv[1] = x[1]; 9399371c9d4SSatish Balay pv[2] = x[2]; 9409371c9d4SSatish Balay pv[3] = x[3]; 9419371c9d4SSatish Balay pv[4] = x[4]; 9429371c9d4SSatish Balay pv[5] = x[5]; 9439371c9d4SSatish Balay pv[6] = x[6]; 9449371c9d4SSatish Balay pv[7] = x[7]; 9459371c9d4SSatish Balay pv[8] = x[8]; 9469371c9d4SSatish Balay pv[9] = x[9]; 9479371c9d4SSatish Balay pv[10] = x[10]; 9489371c9d4SSatish Balay pv[11] = x[11]; 9499371c9d4SSatish Balay pv[12] = x[12]; 9509371c9d4SSatish Balay pv[13] = x[13]; 9519371c9d4SSatish Balay pv[14] = x[14]; 9529371c9d4SSatish Balay pv[15] = x[15]; 9539371c9d4SSatish Balay pv[16] = x[16]; 9549371c9d4SSatish Balay pv[17] = x[17]; 9559371c9d4SSatish Balay pv[18] = x[18]; 9569371c9d4SSatish Balay pv[19] = x[19]; 9579371c9d4SSatish Balay pv[20] = x[20]; 9589371c9d4SSatish Balay pv[21] = x[21]; 9599371c9d4SSatish Balay pv[22] = x[22]; 9609371c9d4SSatish Balay pv[23] = x[23]; 9619371c9d4SSatish Balay pv[24] = x[24]; 9629371c9d4SSatish Balay pv[25] = x[25]; 9639371c9d4SSatish Balay pv[26] = x[26]; 9649371c9d4SSatish Balay pv[27] = x[27]; 9659371c9d4SSatish Balay pv[28] = x[28]; 9669371c9d4SSatish Balay pv[29] = x[29]; 9679371c9d4SSatish Balay pv[30] = x[30]; 9689371c9d4SSatish Balay pv[31] = x[31]; 9699371c9d4SSatish Balay pv[32] = x[32]; 9709371c9d4SSatish Balay pv[33] = x[33]; 9719371c9d4SSatish Balay pv[34] = x[34]; 9729371c9d4SSatish Balay pv[35] = x[35]; 9739371c9d4SSatish Balay pv[36] = x[36]; 9749371c9d4SSatish Balay pv[37] = x[37]; 9759371c9d4SSatish Balay pv[38] = x[38]; 9769371c9d4SSatish Balay pv[39] = x[39]; 9779371c9d4SSatish Balay pv[40] = x[40]; 9789371c9d4SSatish Balay pv[41] = x[41]; 9799371c9d4SSatish Balay pv[42] = x[42]; 9809371c9d4SSatish Balay pv[43] = x[43]; 9819371c9d4SSatish Balay pv[44] = x[44]; 9829371c9d4SSatish Balay pv[45] = x[45]; 9839371c9d4SSatish Balay pv[46] = x[46]; 9849371c9d4SSatish Balay pv[47] = x[47]; 985bef36659SShri Abhyankar pv[48] = x[48]; 986bef36659SShri Abhyankar pv += 49; 987bef36659SShri Abhyankar } 988bef36659SShri Abhyankar /* invert diagonal block */ 989bef36659SShri Abhyankar w = ba + 49 * diag_offset[i]; 9909566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(w, shift, allowzeropivot, &zeropivotdetected)); 9917b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 992bef36659SShri Abhyankar } 993bef36659SShri Abhyankar 9949566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 99526fbe8dcSKarl Rupp 99606e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_inplace; 99706e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace; 998bef36659SShri Abhyankar C->assembled = PETSC_TRUE; 99926fbe8dcSKarl Rupp 10009566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * b->mbs)); /* from inverting diagonal blocks */ 1001bef36659SShri Abhyankar PetscFunctionReturn(0); 1002bef36659SShri Abhyankar } 1003bef36659SShri Abhyankar 1004*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1005*d71ae5a4SJacob Faibussowitsch { 1006bef36659SShri Abhyankar Mat C = B; 1007bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 1008bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row; 1009bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1010bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; 1011bef36659SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; 1012bbd65245SShri Abhyankar PetscInt flg; 1013182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 1014a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 1015bef36659SShri Abhyankar 1016bef36659SShri Abhyankar PetscFunctionBegin; 10170164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 10180164db54SHong Zhang 1019bef36659SShri Abhyankar /* generate work space needed by the factorization */ 10209566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 10219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 1022bef36659SShri Abhyankar 1023bef36659SShri Abhyankar for (i = 0; i < n; i++) { 1024bef36659SShri Abhyankar /* zero rtmp */ 1025bef36659SShri Abhyankar /* L part */ 1026bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 1027bef36659SShri Abhyankar bjtmp = bj + bi[i]; 102848a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 1029bef36659SShri Abhyankar 1030bef36659SShri Abhyankar /* U part */ 103153cca76cSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 103253cca76cSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 103348a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 103453cca76cSShri Abhyankar 103553cca76cSShri Abhyankar /* load in initial (unfactored row) */ 103653cca76cSShri Abhyankar nz = ai[i + 1] - ai[i]; 103753cca76cSShri Abhyankar ajtmp = aj + ai[i]; 103853cca76cSShri Abhyankar v = aa + bs2 * ai[i]; 103948a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); 104053cca76cSShri Abhyankar 104153cca76cSShri Abhyankar /* elimination */ 104253cca76cSShri Abhyankar bjtmp = bj + bi[i]; 104353cca76cSShri Abhyankar nzL = bi[i + 1] - bi[i]; 104453cca76cSShri Abhyankar for (k = 0; k < nzL; k++) { 104553cca76cSShri Abhyankar row = bjtmp[k]; 104653cca76cSShri Abhyankar pc = rtmp + bs2 * row; 1047c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) { 1048c35f09e5SBarry Smith if (pc[j] != 0.0) { 1049c35f09e5SBarry Smith flg = 1; 1050c35f09e5SBarry Smith break; 1051c35f09e5SBarry Smith } 1052c35f09e5SBarry Smith } 105353cca76cSShri Abhyankar if (flg) { 105453cca76cSShri Abhyankar pv = b->a + bs2 * bdiag[row]; 105596b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 10569566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_7(pc, pv, mwork)); 105753cca76cSShri Abhyankar 1058a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 105953cca76cSShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 106053cca76cSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 106153cca76cSShri Abhyankar for (j = 0; j < nz; j++) { 106296b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 106353cca76cSShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 106453cca76cSShri Abhyankar v = rtmp + bs2 * pj[j]; 10659566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_7(v, pc, pv)); 106653cca76cSShri Abhyankar pv += bs2; 106753cca76cSShri Abhyankar } 10689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 106953cca76cSShri Abhyankar } 107053cca76cSShri Abhyankar } 107153cca76cSShri Abhyankar 107253cca76cSShri Abhyankar /* finished row so stick it into b->a */ 107353cca76cSShri Abhyankar /* L part */ 107453cca76cSShri Abhyankar pv = b->a + bs2 * bi[i]; 107553cca76cSShri Abhyankar pj = b->j + bi[i]; 107653cca76cSShri Abhyankar nz = bi[i + 1] - bi[i]; 107748a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 107853cca76cSShri Abhyankar 1079a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 108053cca76cSShri Abhyankar pv = b->a + bs2 * bdiag[i]; 108153cca76cSShri Abhyankar pj = b->j + bdiag[i]; 10829566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 10839566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(pv, shift, allowzeropivot, &zeropivotdetected)); 10847b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108553cca76cSShri Abhyankar 108653cca76cSShri Abhyankar /* U part */ 108753cca76cSShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 108853cca76cSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 108953cca76cSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 109048a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 109153cca76cSShri Abhyankar } 10929566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 109326fbe8dcSKarl Rupp 10944dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 10954dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 109653cca76cSShri Abhyankar C->assembled = PETSC_TRUE; 109726fbe8dcSKarl Rupp 10989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * n)); /* from inverting diagonal blocks */ 109953cca76cSShri Abhyankar PetscFunctionReturn(0); 110053cca76cSShri Abhyankar } 1101