1be1d678aSKris Buschelman 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 783287d42SBarry Smith /* 883287d42SBarry Smith Version for when blocks are 7 by 7 983287d42SBarry Smith */ 109371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_inplace(Mat C, Mat A, const MatFactorInfo *info) { 1183287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 1283287d42SBarry Smith IS isrow = b->row, isicol = b->icol; 135d0c19d7SBarry Smith const PetscInt *r, *ic, *bi = b->i, *bj = b->j, *ajtmp, *diag_offset = b->diag, *ai = a->i, *aj = a->j, *pj, *ajtmpold; 145d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, nz, row, idx; 1583287d42SBarry Smith MatScalar *pv, *v, *rtmp, *pc, *w, *x; 1683287d42SBarry Smith MatScalar p1, p2, p3, p4, m1, m2, m3, m4, m5, m6, m7, m8, m9, x1, x2, x3, x4; 1783287d42SBarry Smith MatScalar p5, p6, p7, p8, p9, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16; 1883287d42SBarry Smith MatScalar x17, x18, x19, x20, x21, x22, x23, x24, x25, p10, p11, p12, p13, p14; 1983287d42SBarry Smith MatScalar p15, p16, p17, p18, p19, p20, p21, p22, p23, p24, p25, m10, m11, m12; 2083287d42SBarry Smith MatScalar m13, m14, m15, m16, m17, m18, m19, m20, m21, m22, m23, m24, m25; 2183287d42SBarry Smith MatScalar p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36; 2283287d42SBarry Smith MatScalar p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49; 2383287d42SBarry Smith MatScalar x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36; 2483287d42SBarry Smith MatScalar x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49; 2583287d42SBarry Smith MatScalar m26, m27, m28, m29, m30, m31, m32, m33, m34, m35, m36; 2683287d42SBarry Smith MatScalar m37, m38, m39, m40, m41, m42, m43, m44, m45, m46, m47, m48, m49; 2783287d42SBarry Smith MatScalar *ba = b->a, *aa = a->a; 28182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 29a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 3083287d42SBarry Smith 3183287d42SBarry Smith PetscFunctionBegin; 320164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 339566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 349566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(49 * (n + 1), &rtmp)); 3683287d42SBarry Smith 3783287d42SBarry Smith for (i = 0; i < n; i++) { 3883287d42SBarry Smith nz = bi[i + 1] - bi[i]; 3983287d42SBarry Smith ajtmp = bj + bi[i]; 4083287d42SBarry Smith for (j = 0; j < nz; j++) { 4183287d42SBarry Smith x = rtmp + 49 * ajtmp[j]; 4283287d42SBarry Smith x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = x[9] = 0.0; 4383287d42SBarry Smith x[10] = x[11] = x[12] = x[13] = x[14] = x[15] = x[16] = x[17] = 0.0; 4483287d42SBarry Smith x[18] = x[19] = x[20] = x[21] = x[22] = x[23] = x[24] = x[25] = 0.0; 4583287d42SBarry Smith x[26] = x[27] = x[28] = x[29] = x[30] = x[31] = x[32] = x[33] = 0.0; 4683287d42SBarry Smith x[34] = x[35] = x[36] = x[37] = x[38] = x[39] = x[40] = x[41] = 0.0; 4783287d42SBarry Smith x[42] = x[43] = x[44] = x[45] = x[46] = x[47] = x[48] = 0.0; 4883287d42SBarry Smith } 4983287d42SBarry Smith /* load in initial (unfactored row) */ 5083287d42SBarry Smith idx = r[i]; 5183287d42SBarry Smith nz = ai[idx + 1] - ai[idx]; 5283287d42SBarry Smith ajtmpold = aj + ai[idx]; 5383287d42SBarry Smith v = aa + 49 * ai[idx]; 5483287d42SBarry Smith for (j = 0; j < nz; j++) { 5583287d42SBarry Smith x = rtmp + 49 * ic[ajtmpold[j]]; 569371c9d4SSatish Balay x[0] = v[0]; 579371c9d4SSatish Balay x[1] = v[1]; 589371c9d4SSatish Balay x[2] = v[2]; 599371c9d4SSatish Balay x[3] = v[3]; 609371c9d4SSatish Balay x[4] = v[4]; 619371c9d4SSatish Balay x[5] = v[5]; 629371c9d4SSatish Balay x[6] = v[6]; 639371c9d4SSatish Balay x[7] = v[7]; 649371c9d4SSatish Balay x[8] = v[8]; 659371c9d4SSatish Balay x[9] = v[9]; 669371c9d4SSatish Balay x[10] = v[10]; 679371c9d4SSatish Balay x[11] = v[11]; 689371c9d4SSatish Balay x[12] = v[12]; 699371c9d4SSatish Balay x[13] = v[13]; 709371c9d4SSatish Balay x[14] = v[14]; 719371c9d4SSatish Balay x[15] = v[15]; 729371c9d4SSatish Balay x[16] = v[16]; 739371c9d4SSatish Balay x[17] = v[17]; 749371c9d4SSatish Balay x[18] = v[18]; 759371c9d4SSatish Balay x[19] = v[19]; 769371c9d4SSatish Balay x[20] = v[20]; 779371c9d4SSatish Balay x[21] = v[21]; 789371c9d4SSatish Balay x[22] = v[22]; 799371c9d4SSatish Balay x[23] = v[23]; 809371c9d4SSatish Balay x[24] = v[24]; 819371c9d4SSatish Balay x[25] = v[25]; 829371c9d4SSatish Balay x[26] = v[26]; 839371c9d4SSatish Balay x[27] = v[27]; 849371c9d4SSatish Balay x[28] = v[28]; 859371c9d4SSatish Balay x[29] = v[29]; 869371c9d4SSatish Balay x[30] = v[30]; 879371c9d4SSatish Balay x[31] = v[31]; 889371c9d4SSatish Balay x[32] = v[32]; 899371c9d4SSatish Balay x[33] = v[33]; 909371c9d4SSatish Balay x[34] = v[34]; 919371c9d4SSatish Balay x[35] = v[35]; 929371c9d4SSatish Balay x[36] = v[36]; 939371c9d4SSatish Balay x[37] = v[37]; 949371c9d4SSatish Balay x[38] = v[38]; 959371c9d4SSatish Balay x[39] = v[39]; 969371c9d4SSatish Balay x[40] = v[40]; 979371c9d4SSatish Balay x[41] = v[41]; 989371c9d4SSatish Balay x[42] = v[42]; 999371c9d4SSatish Balay x[43] = v[43]; 1009371c9d4SSatish Balay x[44] = v[44]; 1019371c9d4SSatish Balay x[45] = v[45]; 1029371c9d4SSatish Balay x[46] = v[46]; 1039371c9d4SSatish Balay x[47] = v[47]; 10483287d42SBarry Smith x[48] = v[48]; 10583287d42SBarry Smith v += 49; 10683287d42SBarry Smith } 10783287d42SBarry Smith row = *ajtmp++; 10883287d42SBarry Smith while (row < i) { 10983287d42SBarry Smith pc = rtmp + 49 * row; 1109371c9d4SSatish Balay p1 = pc[0]; 1119371c9d4SSatish Balay p2 = pc[1]; 1129371c9d4SSatish Balay p3 = pc[2]; 1139371c9d4SSatish Balay p4 = pc[3]; 1149371c9d4SSatish Balay p5 = pc[4]; 1159371c9d4SSatish Balay p6 = pc[5]; 1169371c9d4SSatish Balay p7 = pc[6]; 1179371c9d4SSatish Balay p8 = pc[7]; 1189371c9d4SSatish Balay p9 = pc[8]; 1199371c9d4SSatish Balay p10 = pc[9]; 1209371c9d4SSatish Balay p11 = pc[10]; 1219371c9d4SSatish Balay p12 = pc[11]; 1229371c9d4SSatish Balay p13 = pc[12]; 1239371c9d4SSatish Balay p14 = pc[13]; 1249371c9d4SSatish Balay p15 = pc[14]; 1259371c9d4SSatish Balay p16 = pc[15]; 1269371c9d4SSatish Balay p17 = pc[16]; 1279371c9d4SSatish Balay p18 = pc[17]; 1289371c9d4SSatish Balay p19 = pc[18]; 1299371c9d4SSatish Balay p20 = pc[19]; 1309371c9d4SSatish Balay p21 = pc[20]; 1319371c9d4SSatish Balay p22 = pc[21]; 1329371c9d4SSatish Balay p23 = pc[22]; 1339371c9d4SSatish Balay p24 = pc[23]; 1349371c9d4SSatish Balay p25 = pc[24]; 1359371c9d4SSatish Balay p26 = pc[25]; 1369371c9d4SSatish Balay p27 = pc[26]; 1379371c9d4SSatish Balay p28 = pc[27]; 1389371c9d4SSatish Balay p29 = pc[28]; 1399371c9d4SSatish Balay p30 = pc[29]; 1409371c9d4SSatish Balay p31 = pc[30]; 1419371c9d4SSatish Balay p32 = pc[31]; 1429371c9d4SSatish Balay p33 = pc[32]; 1439371c9d4SSatish Balay p34 = pc[33]; 1449371c9d4SSatish Balay p35 = pc[34]; 1459371c9d4SSatish Balay p36 = pc[35]; 1469371c9d4SSatish Balay p37 = pc[36]; 1479371c9d4SSatish Balay p38 = pc[37]; 1489371c9d4SSatish Balay p39 = pc[38]; 1499371c9d4SSatish Balay p40 = pc[39]; 1509371c9d4SSatish Balay p41 = pc[40]; 1519371c9d4SSatish Balay p42 = pc[41]; 1529371c9d4SSatish Balay p43 = pc[42]; 1539371c9d4SSatish Balay p44 = pc[43]; 1549371c9d4SSatish Balay p45 = pc[44]; 1559371c9d4SSatish Balay p46 = pc[45]; 1569371c9d4SSatish Balay p47 = pc[46]; 1579371c9d4SSatish Balay p48 = pc[47]; 15883287d42SBarry Smith p49 = pc[48]; 1599371c9d4SSatish Balay if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 || p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0 || p10 != 0.0 || p11 != 0.0 || p12 != 0.0 || p13 != 0.0 || p14 != 0.0 || p15 != 0.0 || p16 != 0.0 || p17 != 0.0 || p18 != 0.0 || p19 != 0.0 || p20 != 0.0 || p21 != 0.0 || p22 != 0.0 || p23 != 0.0 || p24 != 0.0 || p25 != 0.0 || p26 != 0.0 || p27 != 0.0 || p28 != 0.0 || p29 != 0.0 || p30 != 0.0 || p31 != 0.0 || p32 != 0.0 || p33 != 0.0 || p34 != 0.0 || p35 != 0.0 || p36 != 0.0 || p37 != 0.0 || p38 != 0.0 || p39 != 0.0 || p40 != 0.0 || p41 != 0.0 || p42 != 0.0 || p43 != 0.0 || p44 != 0.0 || p45 != 0.0 || p46 != 0.0 || p47 != 0.0 || p48 != 0.0 || p49 != 0.0) { 16083287d42SBarry Smith pv = ba + 49 * diag_offset[row]; 16183287d42SBarry Smith pj = bj + diag_offset[row] + 1; 1629371c9d4SSatish Balay x1 = pv[0]; 1639371c9d4SSatish Balay x2 = pv[1]; 1649371c9d4SSatish Balay x3 = pv[2]; 1659371c9d4SSatish Balay x4 = pv[3]; 1669371c9d4SSatish Balay x5 = pv[4]; 1679371c9d4SSatish Balay x6 = pv[5]; 1689371c9d4SSatish Balay x7 = pv[6]; 1699371c9d4SSatish Balay x8 = pv[7]; 1709371c9d4SSatish Balay x9 = pv[8]; 1719371c9d4SSatish Balay x10 = pv[9]; 1729371c9d4SSatish Balay x11 = pv[10]; 1739371c9d4SSatish Balay x12 = pv[11]; 1749371c9d4SSatish Balay x13 = pv[12]; 1759371c9d4SSatish Balay x14 = pv[13]; 1769371c9d4SSatish Balay x15 = pv[14]; 1779371c9d4SSatish Balay x16 = pv[15]; 1789371c9d4SSatish Balay x17 = pv[16]; 1799371c9d4SSatish Balay x18 = pv[17]; 1809371c9d4SSatish Balay x19 = pv[18]; 1819371c9d4SSatish Balay x20 = pv[19]; 1829371c9d4SSatish Balay x21 = pv[20]; 1839371c9d4SSatish Balay x22 = pv[21]; 1849371c9d4SSatish Balay x23 = pv[22]; 1859371c9d4SSatish Balay x24 = pv[23]; 1869371c9d4SSatish Balay x25 = pv[24]; 1879371c9d4SSatish Balay x26 = pv[25]; 1889371c9d4SSatish Balay x27 = pv[26]; 1899371c9d4SSatish Balay x28 = pv[27]; 1909371c9d4SSatish Balay x29 = pv[28]; 1919371c9d4SSatish Balay x30 = pv[29]; 1929371c9d4SSatish Balay x31 = pv[30]; 1939371c9d4SSatish Balay x32 = pv[31]; 1949371c9d4SSatish Balay x33 = pv[32]; 1959371c9d4SSatish Balay x34 = pv[33]; 1969371c9d4SSatish Balay x35 = pv[34]; 1979371c9d4SSatish Balay x36 = pv[35]; 1989371c9d4SSatish Balay x37 = pv[36]; 1999371c9d4SSatish Balay x38 = pv[37]; 2009371c9d4SSatish Balay x39 = pv[38]; 2019371c9d4SSatish Balay x40 = pv[39]; 2029371c9d4SSatish Balay x41 = pv[40]; 2039371c9d4SSatish Balay x42 = pv[41]; 2049371c9d4SSatish Balay x43 = pv[42]; 2059371c9d4SSatish Balay x44 = pv[43]; 2069371c9d4SSatish Balay x45 = pv[44]; 2079371c9d4SSatish Balay x46 = pv[45]; 2089371c9d4SSatish Balay x47 = pv[46]; 2099371c9d4SSatish Balay x48 = pv[47]; 21083287d42SBarry Smith x49 = pv[48]; 21183287d42SBarry Smith pc[0] = m1 = p1 * x1 + p8 * x2 + p15 * x3 + p22 * x4 + p29 * x5 + p36 * x6 + p43 * x7; 21283287d42SBarry Smith pc[1] = m2 = p2 * x1 + p9 * x2 + p16 * x3 + p23 * x4 + p30 * x5 + p37 * x6 + p44 * x7; 21383287d42SBarry Smith pc[2] = m3 = p3 * x1 + p10 * x2 + p17 * x3 + p24 * x4 + p31 * x5 + p38 * x6 + p45 * x7; 21483287d42SBarry Smith pc[3] = m4 = p4 * x1 + p11 * x2 + p18 * x3 + p25 * x4 + p32 * x5 + p39 * x6 + p46 * x7; 21583287d42SBarry Smith pc[4] = m5 = p5 * x1 + p12 * x2 + p19 * x3 + p26 * x4 + p33 * x5 + p40 * x6 + p47 * x7; 21683287d42SBarry Smith pc[5] = m6 = p6 * x1 + p13 * x2 + p20 * x3 + p27 * x4 + p34 * x5 + p41 * x6 + p48 * x7; 21783287d42SBarry Smith pc[6] = m7 = p7 * x1 + p14 * x2 + p21 * x3 + p28 * x4 + p35 * x5 + p42 * x6 + p49 * x7; 21883287d42SBarry Smith 21983287d42SBarry Smith pc[7] = m8 = p1 * x8 + p8 * x9 + p15 * x10 + p22 * x11 + p29 * x12 + p36 * x13 + p43 * x14; 22083287d42SBarry Smith pc[8] = m9 = p2 * x8 + p9 * x9 + p16 * x10 + p23 * x11 + p30 * x12 + p37 * x13 + p44 * x14; 22183287d42SBarry Smith pc[9] = m10 = p3 * x8 + p10 * x9 + p17 * x10 + p24 * x11 + p31 * x12 + p38 * x13 + p45 * x14; 22283287d42SBarry Smith pc[10] = m11 = p4 * x8 + p11 * x9 + p18 * x10 + p25 * x11 + p32 * x12 + p39 * x13 + p46 * x14; 22383287d42SBarry Smith pc[11] = m12 = p5 * x8 + p12 * x9 + p19 * x10 + p26 * x11 + p33 * x12 + p40 * x13 + p47 * x14; 22483287d42SBarry Smith pc[12] = m13 = p6 * x8 + p13 * x9 + p20 * x10 + p27 * x11 + p34 * x12 + p41 * x13 + p48 * x14; 22583287d42SBarry Smith pc[13] = m14 = p7 * x8 + p14 * x9 + p21 * x10 + p28 * x11 + p35 * x12 + p42 * x13 + p49 * x14; 22683287d42SBarry Smith 22783287d42SBarry Smith pc[14] = m15 = p1 * x15 + p8 * x16 + p15 * x17 + p22 * x18 + p29 * x19 + p36 * x20 + p43 * x21; 22883287d42SBarry Smith pc[15] = m16 = p2 * x15 + p9 * x16 + p16 * x17 + p23 * x18 + p30 * x19 + p37 * x20 + p44 * x21; 22983287d42SBarry Smith pc[16] = m17 = p3 * x15 + p10 * x16 + p17 * x17 + p24 * x18 + p31 * x19 + p38 * x20 + p45 * x21; 23083287d42SBarry Smith pc[17] = m18 = p4 * x15 + p11 * x16 + p18 * x17 + p25 * x18 + p32 * x19 + p39 * x20 + p46 * x21; 23183287d42SBarry Smith pc[18] = m19 = p5 * x15 + p12 * x16 + p19 * x17 + p26 * x18 + p33 * x19 + p40 * x20 + p47 * x21; 23283287d42SBarry Smith pc[19] = m20 = p6 * x15 + p13 * x16 + p20 * x17 + p27 * x18 + p34 * x19 + p41 * x20 + p48 * x21; 23383287d42SBarry Smith pc[20] = m21 = p7 * x15 + p14 * x16 + p21 * x17 + p28 * x18 + p35 * x19 + p42 * x20 + p49 * x21; 23483287d42SBarry Smith 23583287d42SBarry Smith pc[21] = m22 = p1 * x22 + p8 * x23 + p15 * x24 + p22 * x25 + p29 * x26 + p36 * x27 + p43 * x28; 23683287d42SBarry Smith pc[22] = m23 = p2 * x22 + p9 * x23 + p16 * x24 + p23 * x25 + p30 * x26 + p37 * x27 + p44 * x28; 23783287d42SBarry Smith pc[23] = m24 = p3 * x22 + p10 * x23 + p17 * x24 + p24 * x25 + p31 * x26 + p38 * x27 + p45 * x28; 23883287d42SBarry Smith pc[24] = m25 = p4 * x22 + p11 * x23 + p18 * x24 + p25 * x25 + p32 * x26 + p39 * x27 + p46 * x28; 23983287d42SBarry Smith pc[25] = m26 = p5 * x22 + p12 * x23 + p19 * x24 + p26 * x25 + p33 * x26 + p40 * x27 + p47 * x28; 24083287d42SBarry Smith pc[26] = m27 = p6 * x22 + p13 * x23 + p20 * x24 + p27 * x25 + p34 * x26 + p41 * x27 + p48 * x28; 24183287d42SBarry Smith pc[27] = m28 = p7 * x22 + p14 * x23 + p21 * x24 + p28 * x25 + p35 * x26 + p42 * x27 + p49 * x28; 24283287d42SBarry Smith 24383287d42SBarry Smith pc[28] = m29 = p1 * x29 + p8 * x30 + p15 * x31 + p22 * x32 + p29 * x33 + p36 * x34 + p43 * x35; 24483287d42SBarry Smith pc[29] = m30 = p2 * x29 + p9 * x30 + p16 * x31 + p23 * x32 + p30 * x33 + p37 * x34 + p44 * x35; 24583287d42SBarry Smith pc[30] = m31 = p3 * x29 + p10 * x30 + p17 * x31 + p24 * x32 + p31 * x33 + p38 * x34 + p45 * x35; 24683287d42SBarry Smith pc[31] = m32 = p4 * x29 + p11 * x30 + p18 * x31 + p25 * x32 + p32 * x33 + p39 * x34 + p46 * x35; 24783287d42SBarry Smith pc[32] = m33 = p5 * x29 + p12 * x30 + p19 * x31 + p26 * x32 + p33 * x33 + p40 * x34 + p47 * x35; 24883287d42SBarry Smith pc[33] = m34 = p6 * x29 + p13 * x30 + p20 * x31 + p27 * x32 + p34 * x33 + p41 * x34 + p48 * x35; 24983287d42SBarry Smith pc[34] = m35 = p7 * x29 + p14 * x30 + p21 * x31 + p28 * x32 + p35 * x33 + p42 * x34 + p49 * x35; 25083287d42SBarry Smith 25183287d42SBarry Smith pc[35] = m36 = p1 * x36 + p8 * x37 + p15 * x38 + p22 * x39 + p29 * x40 + p36 * x41 + p43 * x42; 25283287d42SBarry Smith pc[36] = m37 = p2 * x36 + p9 * x37 + p16 * x38 + p23 * x39 + p30 * x40 + p37 * x41 + p44 * x42; 25383287d42SBarry Smith pc[37] = m38 = p3 * x36 + p10 * x37 + p17 * x38 + p24 * x39 + p31 * x40 + p38 * x41 + p45 * x42; 25483287d42SBarry Smith pc[38] = m39 = p4 * x36 + p11 * x37 + p18 * x38 + p25 * x39 + p32 * x40 + p39 * x41 + p46 * x42; 25583287d42SBarry Smith pc[39] = m40 = p5 * x36 + p12 * x37 + p19 * x38 + p26 * x39 + p33 * x40 + p40 * x41 + p47 * x42; 25683287d42SBarry Smith pc[40] = m41 = p6 * x36 + p13 * x37 + p20 * x38 + p27 * x39 + p34 * x40 + p41 * x41 + p48 * x42; 25783287d42SBarry Smith pc[41] = m42 = p7 * x36 + p14 * x37 + p21 * x38 + p28 * x39 + p35 * x40 + p42 * x41 + p49 * x42; 25883287d42SBarry Smith 25983287d42SBarry Smith pc[42] = m43 = p1 * x43 + p8 * x44 + p15 * x45 + p22 * x46 + p29 * x47 + p36 * x48 + p43 * x49; 26083287d42SBarry Smith pc[43] = m44 = p2 * x43 + p9 * x44 + p16 * x45 + p23 * x46 + p30 * x47 + p37 * x48 + p44 * x49; 26183287d42SBarry Smith pc[44] = m45 = p3 * x43 + p10 * x44 + p17 * x45 + p24 * x46 + p31 * x47 + p38 * x48 + p45 * x49; 26283287d42SBarry Smith pc[45] = m46 = p4 * x43 + p11 * x44 + p18 * x45 + p25 * x46 + p32 * x47 + p39 * x48 + p46 * x49; 26383287d42SBarry Smith pc[46] = m47 = p5 * x43 + p12 * x44 + p19 * x45 + p26 * x46 + p33 * x47 + p40 * x48 + p47 * x49; 26483287d42SBarry Smith pc[47] = m48 = p6 * x43 + p13 * x44 + p20 * x45 + p27 * x46 + p34 * x47 + p41 * x48 + p48 * x49; 26583287d42SBarry Smith pc[48] = m49 = p7 * x43 + p14 * x44 + p21 * x45 + p28 * x46 + p35 * x47 + p42 * x48 + p49 * x49; 26683287d42SBarry Smith 26783287d42SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1; 26883287d42SBarry Smith pv += 49; 26983287d42SBarry Smith for (j = 0; j < nz; j++) { 2709371c9d4SSatish Balay x1 = pv[0]; 2719371c9d4SSatish Balay x2 = pv[1]; 2729371c9d4SSatish Balay x3 = pv[2]; 2739371c9d4SSatish Balay x4 = pv[3]; 2749371c9d4SSatish Balay x5 = pv[4]; 2759371c9d4SSatish Balay x6 = pv[5]; 2769371c9d4SSatish Balay x7 = pv[6]; 2779371c9d4SSatish Balay x8 = pv[7]; 2789371c9d4SSatish Balay x9 = pv[8]; 2799371c9d4SSatish Balay x10 = pv[9]; 2809371c9d4SSatish Balay x11 = pv[10]; 2819371c9d4SSatish Balay x12 = pv[11]; 2829371c9d4SSatish Balay x13 = pv[12]; 2839371c9d4SSatish Balay x14 = pv[13]; 2849371c9d4SSatish Balay x15 = pv[14]; 2859371c9d4SSatish Balay x16 = pv[15]; 2869371c9d4SSatish Balay x17 = pv[16]; 2879371c9d4SSatish Balay x18 = pv[17]; 2889371c9d4SSatish Balay x19 = pv[18]; 2899371c9d4SSatish Balay x20 = pv[19]; 2909371c9d4SSatish Balay x21 = pv[20]; 2919371c9d4SSatish Balay x22 = pv[21]; 2929371c9d4SSatish Balay x23 = pv[22]; 2939371c9d4SSatish Balay x24 = pv[23]; 2949371c9d4SSatish Balay x25 = pv[24]; 2959371c9d4SSatish Balay x26 = pv[25]; 2969371c9d4SSatish Balay x27 = pv[26]; 2979371c9d4SSatish Balay x28 = pv[27]; 2989371c9d4SSatish Balay x29 = pv[28]; 2999371c9d4SSatish Balay x30 = pv[29]; 3009371c9d4SSatish Balay x31 = pv[30]; 3019371c9d4SSatish Balay x32 = pv[31]; 3029371c9d4SSatish Balay x33 = pv[32]; 3039371c9d4SSatish Balay x34 = pv[33]; 3049371c9d4SSatish Balay x35 = pv[34]; 3059371c9d4SSatish Balay x36 = pv[35]; 3069371c9d4SSatish Balay x37 = pv[36]; 3079371c9d4SSatish Balay x38 = pv[37]; 3089371c9d4SSatish Balay x39 = pv[38]; 3099371c9d4SSatish Balay x40 = pv[39]; 3109371c9d4SSatish Balay x41 = pv[40]; 3119371c9d4SSatish Balay x42 = pv[41]; 3129371c9d4SSatish Balay x43 = pv[42]; 3139371c9d4SSatish Balay x44 = pv[43]; 3149371c9d4SSatish Balay x45 = pv[44]; 3159371c9d4SSatish Balay x46 = pv[45]; 3169371c9d4SSatish Balay x47 = pv[46]; 3179371c9d4SSatish Balay x48 = pv[47]; 31883287d42SBarry Smith x49 = pv[48]; 31983287d42SBarry Smith x = rtmp + 49 * pj[j]; 32083287d42SBarry Smith x[0] -= m1 * x1 + m8 * x2 + m15 * x3 + m22 * x4 + m29 * x5 + m36 * x6 + m43 * x7; 32183287d42SBarry Smith x[1] -= m2 * x1 + m9 * x2 + m16 * x3 + m23 * x4 + m30 * x5 + m37 * x6 + m44 * x7; 32283287d42SBarry Smith x[2] -= m3 * x1 + m10 * x2 + m17 * x3 + m24 * x4 + m31 * x5 + m38 * x6 + m45 * x7; 32383287d42SBarry Smith x[3] -= m4 * x1 + m11 * x2 + m18 * x3 + m25 * x4 + m32 * x5 + m39 * x6 + m46 * x7; 32483287d42SBarry Smith x[4] -= m5 * x1 + m12 * x2 + m19 * x3 + m26 * x4 + m33 * x5 + m40 * x6 + m47 * x7; 32583287d42SBarry Smith x[5] -= m6 * x1 + m13 * x2 + m20 * x3 + m27 * x4 + m34 * x5 + m41 * x6 + m48 * x7; 32683287d42SBarry Smith x[6] -= m7 * x1 + m14 * x2 + m21 * x3 + m28 * x4 + m35 * x5 + m42 * x6 + m49 * x7; 32783287d42SBarry Smith 32883287d42SBarry Smith x[7] -= m1 * x8 + m8 * x9 + m15 * x10 + m22 * x11 + m29 * x12 + m36 * x13 + m43 * x14; 32983287d42SBarry Smith x[8] -= m2 * x8 + m9 * x9 + m16 * x10 + m23 * x11 + m30 * x12 + m37 * x13 + m44 * x14; 33083287d42SBarry Smith x[9] -= m3 * x8 + m10 * x9 + m17 * x10 + m24 * x11 + m31 * x12 + m38 * x13 + m45 * x14; 33183287d42SBarry Smith x[10] -= m4 * x8 + m11 * x9 + m18 * x10 + m25 * x11 + m32 * x12 + m39 * x13 + m46 * x14; 33283287d42SBarry Smith x[11] -= m5 * x8 + m12 * x9 + m19 * x10 + m26 * x11 + m33 * x12 + m40 * x13 + m47 * x14; 33383287d42SBarry Smith x[12] -= m6 * x8 + m13 * x9 + m20 * x10 + m27 * x11 + m34 * x12 + m41 * x13 + m48 * x14; 33483287d42SBarry Smith x[13] -= m7 * x8 + m14 * x9 + m21 * x10 + m28 * x11 + m35 * x12 + m42 * x13 + m49 * x14; 33583287d42SBarry Smith 33683287d42SBarry Smith x[14] -= m1 * x15 + m8 * x16 + m15 * x17 + m22 * x18 + m29 * x19 + m36 * x20 + m43 * x21; 33783287d42SBarry Smith x[15] -= m2 * x15 + m9 * x16 + m16 * x17 + m23 * x18 + m30 * x19 + m37 * x20 + m44 * x21; 33883287d42SBarry Smith x[16] -= m3 * x15 + m10 * x16 + m17 * x17 + m24 * x18 + m31 * x19 + m38 * x20 + m45 * x21; 33983287d42SBarry Smith x[17] -= m4 * x15 + m11 * x16 + m18 * x17 + m25 * x18 + m32 * x19 + m39 * x20 + m46 * x21; 34083287d42SBarry Smith x[18] -= m5 * x15 + m12 * x16 + m19 * x17 + m26 * x18 + m33 * x19 + m40 * x20 + m47 * x21; 34183287d42SBarry Smith x[19] -= m6 * x15 + m13 * x16 + m20 * x17 + m27 * x18 + m34 * x19 + m41 * x20 + m48 * x21; 34283287d42SBarry Smith x[20] -= m7 * x15 + m14 * x16 + m21 * x17 + m28 * x18 + m35 * x19 + m42 * x20 + m49 * x21; 34383287d42SBarry Smith 34483287d42SBarry Smith x[21] -= m1 * x22 + m8 * x23 + m15 * x24 + m22 * x25 + m29 * x26 + m36 * x27 + m43 * x28; 34583287d42SBarry Smith x[22] -= m2 * x22 + m9 * x23 + m16 * x24 + m23 * x25 + m30 * x26 + m37 * x27 + m44 * x28; 34683287d42SBarry Smith x[23] -= m3 * x22 + m10 * x23 + m17 * x24 + m24 * x25 + m31 * x26 + m38 * x27 + m45 * x28; 34783287d42SBarry Smith x[24] -= m4 * x22 + m11 * x23 + m18 * x24 + m25 * x25 + m32 * x26 + m39 * x27 + m46 * x28; 34883287d42SBarry Smith x[25] -= m5 * x22 + m12 * x23 + m19 * x24 + m26 * x25 + m33 * x26 + m40 * x27 + m47 * x28; 34983287d42SBarry Smith x[26] -= m6 * x22 + m13 * x23 + m20 * x24 + m27 * x25 + m34 * x26 + m41 * x27 + m48 * x28; 35083287d42SBarry Smith x[27] -= m7 * x22 + m14 * x23 + m21 * x24 + m28 * x25 + m35 * x26 + m42 * x27 + m49 * x28; 35183287d42SBarry Smith 35283287d42SBarry Smith x[28] -= m1 * x29 + m8 * x30 + m15 * x31 + m22 * x32 + m29 * x33 + m36 * x34 + m43 * x35; 35383287d42SBarry Smith x[29] -= m2 * x29 + m9 * x30 + m16 * x31 + m23 * x32 + m30 * x33 + m37 * x34 + m44 * x35; 35483287d42SBarry Smith x[30] -= m3 * x29 + m10 * x30 + m17 * x31 + m24 * x32 + m31 * x33 + m38 * x34 + m45 * x35; 35583287d42SBarry Smith x[31] -= m4 * x29 + m11 * x30 + m18 * x31 + m25 * x32 + m32 * x33 + m39 * x34 + m46 * x35; 35683287d42SBarry Smith x[32] -= m5 * x29 + m12 * x30 + m19 * x31 + m26 * x32 + m33 * x33 + m40 * x34 + m47 * x35; 35783287d42SBarry Smith x[33] -= m6 * x29 + m13 * x30 + m20 * x31 + m27 * x32 + m34 * x33 + m41 * x34 + m48 * x35; 35883287d42SBarry Smith x[34] -= m7 * x29 + m14 * x30 + m21 * x31 + m28 * x32 + m35 * x33 + m42 * x34 + m49 * x35; 35983287d42SBarry Smith 36083287d42SBarry Smith x[35] -= m1 * x36 + m8 * x37 + m15 * x38 + m22 * x39 + m29 * x40 + m36 * x41 + m43 * x42; 36183287d42SBarry Smith x[36] -= m2 * x36 + m9 * x37 + m16 * x38 + m23 * x39 + m30 * x40 + m37 * x41 + m44 * x42; 36283287d42SBarry Smith x[37] -= m3 * x36 + m10 * x37 + m17 * x38 + m24 * x39 + m31 * x40 + m38 * x41 + m45 * x42; 36383287d42SBarry Smith x[38] -= m4 * x36 + m11 * x37 + m18 * x38 + m25 * x39 + m32 * x40 + m39 * x41 + m46 * x42; 36483287d42SBarry Smith x[39] -= m5 * x36 + m12 * x37 + m19 * x38 + m26 * x39 + m33 * x40 + m40 * x41 + m47 * x42; 36583287d42SBarry Smith x[40] -= m6 * x36 + m13 * x37 + m20 * x38 + m27 * x39 + m34 * x40 + m41 * x41 + m48 * x42; 36683287d42SBarry Smith x[41] -= m7 * x36 + m14 * x37 + m21 * x38 + m28 * x39 + m35 * x40 + m42 * x41 + m49 * x42; 36783287d42SBarry Smith 36883287d42SBarry Smith x[42] -= m1 * x43 + m8 * x44 + m15 * x45 + m22 * x46 + m29 * x47 + m36 * x48 + m43 * x49; 36983287d42SBarry Smith x[43] -= m2 * x43 + m9 * x44 + m16 * x45 + m23 * x46 + m30 * x47 + m37 * x48 + m44 * x49; 37083287d42SBarry Smith x[44] -= m3 * x43 + m10 * x44 + m17 * x45 + m24 * x46 + m31 * x47 + m38 * x48 + m45 * x49; 37183287d42SBarry Smith x[45] -= m4 * x43 + m11 * x44 + m18 * x45 + m25 * x46 + m32 * x47 + m39 * x48 + m46 * x49; 37283287d42SBarry Smith x[46] -= m5 * x43 + m12 * x44 + m19 * x45 + m26 * x46 + m33 * x47 + m40 * x48 + m47 * x49; 37383287d42SBarry Smith x[47] -= m6 * x43 + m13 * x44 + m20 * x45 + m27 * x46 + m34 * x47 + m41 * x48 + m48 * x49; 37483287d42SBarry Smith x[48] -= m7 * x43 + m14 * x44 + m21 * x45 + m28 * x46 + m35 * x47 + m42 * x48 + m49 * x49; 37583287d42SBarry Smith pv += 49; 37683287d42SBarry Smith } 3779566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637.0)); 37883287d42SBarry Smith } 37983287d42SBarry Smith row = *ajtmp++; 38083287d42SBarry Smith } 38183287d42SBarry Smith /* finished row so stick it into b->a */ 38283287d42SBarry Smith pv = ba + 49 * bi[i]; 38383287d42SBarry Smith pj = bj + bi[i]; 38483287d42SBarry Smith nz = bi[i + 1] - bi[i]; 38583287d42SBarry Smith for (j = 0; j < nz; j++) { 38683287d42SBarry Smith x = rtmp + 49 * pj[j]; 3879371c9d4SSatish Balay pv[0] = x[0]; 3889371c9d4SSatish Balay pv[1] = x[1]; 3899371c9d4SSatish Balay pv[2] = x[2]; 3909371c9d4SSatish Balay pv[3] = x[3]; 3919371c9d4SSatish Balay pv[4] = x[4]; 3929371c9d4SSatish Balay pv[5] = x[5]; 3939371c9d4SSatish Balay pv[6] = x[6]; 3949371c9d4SSatish Balay pv[7] = x[7]; 3959371c9d4SSatish Balay pv[8] = x[8]; 3969371c9d4SSatish Balay pv[9] = x[9]; 3979371c9d4SSatish Balay pv[10] = x[10]; 3989371c9d4SSatish Balay pv[11] = x[11]; 3999371c9d4SSatish Balay pv[12] = x[12]; 4009371c9d4SSatish Balay pv[13] = x[13]; 4019371c9d4SSatish Balay pv[14] = x[14]; 4029371c9d4SSatish Balay pv[15] = x[15]; 4039371c9d4SSatish Balay pv[16] = x[16]; 4049371c9d4SSatish Balay pv[17] = x[17]; 4059371c9d4SSatish Balay pv[18] = x[18]; 4069371c9d4SSatish Balay pv[19] = x[19]; 4079371c9d4SSatish Balay pv[20] = x[20]; 4089371c9d4SSatish Balay pv[21] = x[21]; 4099371c9d4SSatish Balay pv[22] = x[22]; 4109371c9d4SSatish Balay pv[23] = x[23]; 4119371c9d4SSatish Balay pv[24] = x[24]; 4129371c9d4SSatish Balay pv[25] = x[25]; 4139371c9d4SSatish Balay pv[26] = x[26]; 4149371c9d4SSatish Balay pv[27] = x[27]; 4159371c9d4SSatish Balay pv[28] = x[28]; 4169371c9d4SSatish Balay pv[29] = x[29]; 4179371c9d4SSatish Balay pv[30] = x[30]; 4189371c9d4SSatish Balay pv[31] = x[31]; 4199371c9d4SSatish Balay pv[32] = x[32]; 4209371c9d4SSatish Balay pv[33] = x[33]; 4219371c9d4SSatish Balay pv[34] = x[34]; 4229371c9d4SSatish Balay pv[35] = x[35]; 4239371c9d4SSatish Balay pv[36] = x[36]; 4249371c9d4SSatish Balay pv[37] = x[37]; 4259371c9d4SSatish Balay pv[38] = x[38]; 4269371c9d4SSatish Balay pv[39] = x[39]; 4279371c9d4SSatish Balay pv[40] = x[40]; 4289371c9d4SSatish Balay pv[41] = x[41]; 4299371c9d4SSatish Balay pv[42] = x[42]; 4309371c9d4SSatish Balay pv[43] = x[43]; 4319371c9d4SSatish Balay pv[44] = x[44]; 4329371c9d4SSatish Balay pv[45] = x[45]; 4339371c9d4SSatish Balay pv[46] = x[46]; 4349371c9d4SSatish Balay pv[47] = x[47]; 43583287d42SBarry Smith pv[48] = x[48]; 43683287d42SBarry Smith pv += 49; 43783287d42SBarry Smith } 43883287d42SBarry Smith /* invert diagonal block */ 43983287d42SBarry Smith w = ba + 49 * diag_offset[i]; 4409566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(w, shift, allowzeropivot, &zeropivotdetected)); 4417b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 44283287d42SBarry Smith } 44383287d42SBarry Smith 4449566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 4459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 4469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 44726fbe8dcSKarl Rupp 44806e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_7_inplace; 44906e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_inplace; 45083287d42SBarry Smith C->assembled = PETSC_TRUE; 45126fbe8dcSKarl Rupp 4529566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * b->mbs)); /* from inverting diagonal blocks */ 45383287d42SBarry Smith PetscFunctionReturn(0); 45483287d42SBarry Smith } 455bef36659SShri Abhyankar 4569371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat B, Mat A, const MatFactorInfo *info) { 457bef36659SShri Abhyankar Mat C = B; 458bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 459bef36659SShri Abhyankar IS isrow = b->row, isicol = b->icol; 4605a586d82SBarry Smith const PetscInt *r, *ic; 461bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row; 462bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 463bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; 464bef36659SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; 465bbd65245SShri Abhyankar PetscInt flg; 466182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 467a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 468bef36659SShri Abhyankar 469bef36659SShri Abhyankar PetscFunctionBegin; 4700164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 4719566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 473bef36659SShri Abhyankar 474bef36659SShri Abhyankar /* generate work space needed by the factorization */ 4759566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 4769566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 477bef36659SShri Abhyankar 478bef36659SShri Abhyankar for (i = 0; i < n; i++) { 479bef36659SShri Abhyankar /* zero rtmp */ 480bef36659SShri Abhyankar /* L part */ 481bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 482bef36659SShri Abhyankar bjtmp = bj + bi[i]; 483*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 484bef36659SShri Abhyankar 485bef36659SShri Abhyankar /* U part */ 48635aa4fcfSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 48735aa4fcfSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 488*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 48935aa4fcfSShri Abhyankar 49035aa4fcfSShri Abhyankar /* load in initial (unfactored row) */ 49135aa4fcfSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 49235aa4fcfSShri Abhyankar ajtmp = aj + ai[r[i]]; 49335aa4fcfSShri Abhyankar v = aa + bs2 * ai[r[i]]; 494*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2)); 49535aa4fcfSShri Abhyankar 49635aa4fcfSShri Abhyankar /* elimination */ 49735aa4fcfSShri Abhyankar bjtmp = bj + bi[i]; 49835aa4fcfSShri Abhyankar nzL = bi[i + 1] - bi[i]; 49935aa4fcfSShri Abhyankar for (k = 0; k < nzL; k++) { 50035aa4fcfSShri Abhyankar row = bjtmp[k]; 50135aa4fcfSShri Abhyankar pc = rtmp + bs2 * row; 502c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) { 503c35f09e5SBarry Smith if (pc[j] != 0.0) { 504c35f09e5SBarry Smith flg = 1; 505c35f09e5SBarry Smith break; 506c35f09e5SBarry Smith } 507c35f09e5SBarry Smith } 50835aa4fcfSShri Abhyankar if (flg) { 50935aa4fcfSShri Abhyankar pv = b->a + bs2 * bdiag[row]; 51096b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 5119566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_7(pc, pv, mwork)); 51235aa4fcfSShri Abhyankar 513a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 51435aa4fcfSShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 51535aa4fcfSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 51635aa4fcfSShri Abhyankar for (j = 0; j < nz; j++) { 51796b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 51835aa4fcfSShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 51935aa4fcfSShri Abhyankar v = rtmp + bs2 * pj[j]; 5209566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_7(v, pc, pv)); 52135aa4fcfSShri Abhyankar pv += bs2; 52235aa4fcfSShri Abhyankar } 5239566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 52435aa4fcfSShri Abhyankar } 52535aa4fcfSShri Abhyankar } 52635aa4fcfSShri Abhyankar 52735aa4fcfSShri Abhyankar /* finished row so stick it into b->a */ 52835aa4fcfSShri Abhyankar /* L part */ 52935aa4fcfSShri Abhyankar pv = b->a + bs2 * bi[i]; 53035aa4fcfSShri Abhyankar pj = b->j + bi[i]; 53135aa4fcfSShri Abhyankar nz = bi[i + 1] - bi[i]; 532*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 53335aa4fcfSShri Abhyankar 534a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 53535aa4fcfSShri Abhyankar pv = b->a + bs2 * bdiag[i]; 53635aa4fcfSShri Abhyankar pj = b->j + bdiag[i]; 5379566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 5389566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(pv, shift, allowzeropivot, &zeropivotdetected)); 5397b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54035aa4fcfSShri Abhyankar 54135aa4fcfSShri Abhyankar /* U part */ 54235aa4fcfSShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 54335aa4fcfSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 54435aa4fcfSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 545*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 54635aa4fcfSShri Abhyankar } 54735aa4fcfSShri Abhyankar 5489566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 5499566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5509566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 55126fbe8dcSKarl Rupp 5524dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_7; 5534dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 55435aa4fcfSShri Abhyankar C->assembled = PETSC_TRUE; 55526fbe8dcSKarl Rupp 5569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * n)); /* from inverting diagonal blocks */ 55735aa4fcfSShri Abhyankar PetscFunctionReturn(0); 55835aa4fcfSShri Abhyankar } 55935aa4fcfSShri Abhyankar 5609371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) { 561bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 562bef36659SShri Abhyankar PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j; 563bef36659SShri Abhyankar PetscInt *ajtmpold, *ajtmp, nz, row; 564bef36659SShri Abhyankar PetscInt *diag_offset = b->diag, *ai = a->i, *aj = a->j, *pj; 565bef36659SShri Abhyankar MatScalar *pv, *v, *rtmp, *pc, *w, *x; 566bef36659SShri Abhyankar MatScalar x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 567bef36659SShri Abhyankar MatScalar x16, x17, x18, x19, x20, x21, x22, x23, x24, x25; 568bef36659SShri Abhyankar MatScalar p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15; 569bef36659SShri Abhyankar MatScalar p16, p17, p18, p19, p20, p21, p22, p23, p24, p25; 570bef36659SShri Abhyankar MatScalar m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15; 571bef36659SShri Abhyankar MatScalar m16, m17, m18, m19, m20, m21, m22, m23, m24, m25; 572bef36659SShri Abhyankar MatScalar p26, p27, p28, p29, p30, p31, p32, p33, p34, p35, p36; 573bef36659SShri Abhyankar MatScalar p37, p38, p39, p40, p41, p42, p43, p44, p45, p46, p47, p48, p49; 574bef36659SShri Abhyankar MatScalar x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36; 575bef36659SShri Abhyankar MatScalar x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49; 576bef36659SShri Abhyankar MatScalar m26, m27, m28, m29, m30, m31, m32, m33, m34, m35, m36; 577bef36659SShri Abhyankar MatScalar m37, m38, m39, m40, m41, m42, m43, m44, m45, m46, m47, m48, m49; 578bef36659SShri Abhyankar MatScalar *ba = b->a, *aa = a->a; 579182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 580a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 581bef36659SShri Abhyankar 582bef36659SShri Abhyankar PetscFunctionBegin; 5830164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 5849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(49 * (n + 1), &rtmp)); 585bef36659SShri Abhyankar for (i = 0; i < n; i++) { 586bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 587bef36659SShri Abhyankar ajtmp = bj + bi[i]; 588bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 589bef36659SShri Abhyankar x = rtmp + 49 * ajtmp[j]; 590bef36659SShri Abhyankar x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = x[9] = 0.0; 591bef36659SShri Abhyankar x[10] = x[11] = x[12] = x[13] = x[14] = x[15] = x[16] = x[17] = 0.0; 592bef36659SShri Abhyankar x[18] = x[19] = x[20] = x[21] = x[22] = x[23] = x[24] = x[25] = 0.0; 593bef36659SShri Abhyankar x[26] = x[27] = x[28] = x[29] = x[30] = x[31] = x[32] = x[33] = 0.0; 594bef36659SShri Abhyankar x[34] = x[35] = x[36] = x[37] = x[38] = x[39] = x[40] = x[41] = 0.0; 595bef36659SShri Abhyankar x[42] = x[43] = x[44] = x[45] = x[46] = x[47] = x[48] = 0.0; 596bef36659SShri Abhyankar } 597bef36659SShri Abhyankar /* load in initial (unfactored row) */ 598bef36659SShri Abhyankar nz = ai[i + 1] - ai[i]; 599bef36659SShri Abhyankar ajtmpold = aj + ai[i]; 600bef36659SShri Abhyankar v = aa + 49 * ai[i]; 601bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 602bef36659SShri Abhyankar x = rtmp + 49 * ajtmpold[j]; 6039371c9d4SSatish Balay x[0] = v[0]; 6049371c9d4SSatish Balay x[1] = v[1]; 6059371c9d4SSatish Balay x[2] = v[2]; 6069371c9d4SSatish Balay x[3] = v[3]; 6079371c9d4SSatish Balay x[4] = v[4]; 6089371c9d4SSatish Balay x[5] = v[5]; 6099371c9d4SSatish Balay x[6] = v[6]; 6109371c9d4SSatish Balay x[7] = v[7]; 6119371c9d4SSatish Balay x[8] = v[8]; 6129371c9d4SSatish Balay x[9] = v[9]; 6139371c9d4SSatish Balay x[10] = v[10]; 6149371c9d4SSatish Balay x[11] = v[11]; 6159371c9d4SSatish Balay x[12] = v[12]; 6169371c9d4SSatish Balay x[13] = v[13]; 6179371c9d4SSatish Balay x[14] = v[14]; 6189371c9d4SSatish Balay x[15] = v[15]; 6199371c9d4SSatish Balay x[16] = v[16]; 6209371c9d4SSatish Balay x[17] = v[17]; 6219371c9d4SSatish Balay x[18] = v[18]; 6229371c9d4SSatish Balay x[19] = v[19]; 6239371c9d4SSatish Balay x[20] = v[20]; 6249371c9d4SSatish Balay x[21] = v[21]; 6259371c9d4SSatish Balay x[22] = v[22]; 6269371c9d4SSatish Balay x[23] = v[23]; 6279371c9d4SSatish Balay x[24] = v[24]; 6289371c9d4SSatish Balay x[25] = v[25]; 6299371c9d4SSatish Balay x[26] = v[26]; 6309371c9d4SSatish Balay x[27] = v[27]; 6319371c9d4SSatish Balay x[28] = v[28]; 6329371c9d4SSatish Balay x[29] = v[29]; 6339371c9d4SSatish Balay x[30] = v[30]; 6349371c9d4SSatish Balay x[31] = v[31]; 6359371c9d4SSatish Balay x[32] = v[32]; 6369371c9d4SSatish Balay x[33] = v[33]; 6379371c9d4SSatish Balay x[34] = v[34]; 6389371c9d4SSatish Balay x[35] = v[35]; 6399371c9d4SSatish Balay x[36] = v[36]; 6409371c9d4SSatish Balay x[37] = v[37]; 6419371c9d4SSatish Balay x[38] = v[38]; 6429371c9d4SSatish Balay x[39] = v[39]; 6439371c9d4SSatish Balay x[40] = v[40]; 6449371c9d4SSatish Balay x[41] = v[41]; 6459371c9d4SSatish Balay x[42] = v[42]; 6469371c9d4SSatish Balay x[43] = v[43]; 6479371c9d4SSatish Balay x[44] = v[44]; 6489371c9d4SSatish Balay x[45] = v[45]; 6499371c9d4SSatish Balay x[46] = v[46]; 6509371c9d4SSatish Balay x[47] = v[47]; 651bef36659SShri Abhyankar x[48] = v[48]; 652bef36659SShri Abhyankar v += 49; 653bef36659SShri Abhyankar } 654bef36659SShri Abhyankar row = *ajtmp++; 655bef36659SShri Abhyankar while (row < i) { 656bef36659SShri Abhyankar pc = rtmp + 49 * row; 6579371c9d4SSatish Balay p1 = pc[0]; 6589371c9d4SSatish Balay p2 = pc[1]; 6599371c9d4SSatish Balay p3 = pc[2]; 6609371c9d4SSatish Balay p4 = pc[3]; 6619371c9d4SSatish Balay p5 = pc[4]; 6629371c9d4SSatish Balay p6 = pc[5]; 6639371c9d4SSatish Balay p7 = pc[6]; 6649371c9d4SSatish Balay p8 = pc[7]; 6659371c9d4SSatish Balay p9 = pc[8]; 6669371c9d4SSatish Balay p10 = pc[9]; 6679371c9d4SSatish Balay p11 = pc[10]; 6689371c9d4SSatish Balay p12 = pc[11]; 6699371c9d4SSatish Balay p13 = pc[12]; 6709371c9d4SSatish Balay p14 = pc[13]; 6719371c9d4SSatish Balay p15 = pc[14]; 6729371c9d4SSatish Balay p16 = pc[15]; 6739371c9d4SSatish Balay p17 = pc[16]; 6749371c9d4SSatish Balay p18 = pc[17]; 6759371c9d4SSatish Balay p19 = pc[18]; 6769371c9d4SSatish Balay p20 = pc[19]; 6779371c9d4SSatish Balay p21 = pc[20]; 6789371c9d4SSatish Balay p22 = pc[21]; 6799371c9d4SSatish Balay p23 = pc[22]; 6809371c9d4SSatish Balay p24 = pc[23]; 6819371c9d4SSatish Balay p25 = pc[24]; 6829371c9d4SSatish Balay p26 = pc[25]; 6839371c9d4SSatish Balay p27 = pc[26]; 6849371c9d4SSatish Balay p28 = pc[27]; 6859371c9d4SSatish Balay p29 = pc[28]; 6869371c9d4SSatish Balay p30 = pc[29]; 6879371c9d4SSatish Balay p31 = pc[30]; 6889371c9d4SSatish Balay p32 = pc[31]; 6899371c9d4SSatish Balay p33 = pc[32]; 6909371c9d4SSatish Balay p34 = pc[33]; 6919371c9d4SSatish Balay p35 = pc[34]; 6929371c9d4SSatish Balay p36 = pc[35]; 6939371c9d4SSatish Balay p37 = pc[36]; 6949371c9d4SSatish Balay p38 = pc[37]; 6959371c9d4SSatish Balay p39 = pc[38]; 6969371c9d4SSatish Balay p40 = pc[39]; 6979371c9d4SSatish Balay p41 = pc[40]; 6989371c9d4SSatish Balay p42 = pc[41]; 6999371c9d4SSatish Balay p43 = pc[42]; 7009371c9d4SSatish Balay p44 = pc[43]; 7019371c9d4SSatish Balay p45 = pc[44]; 7029371c9d4SSatish Balay p46 = pc[45]; 7039371c9d4SSatish Balay p47 = pc[46]; 7049371c9d4SSatish Balay p48 = pc[47]; 705bef36659SShri Abhyankar p49 = pc[48]; 7069371c9d4SSatish Balay if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 || p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0 || p10 != 0.0 || p11 != 0.0 || p12 != 0.0 || p13 != 0.0 || p14 != 0.0 || p15 != 0.0 || p16 != 0.0 || p17 != 0.0 || p18 != 0.0 || p19 != 0.0 || p20 != 0.0 || p21 != 0.0 || p22 != 0.0 || p23 != 0.0 || p24 != 0.0 || p25 != 0.0 || p26 != 0.0 || p27 != 0.0 || p28 != 0.0 || p29 != 0.0 || p30 != 0.0 || p31 != 0.0 || p32 != 0.0 || p33 != 0.0 || p34 != 0.0 || p35 != 0.0 || p36 != 0.0 || p37 != 0.0 || p38 != 0.0 || p39 != 0.0 || p40 != 0.0 || p41 != 0.0 || p42 != 0.0 || p43 != 0.0 || p44 != 0.0 || p45 != 0.0 || p46 != 0.0 || p47 != 0.0 || p48 != 0.0 || p49 != 0.0) { 707bef36659SShri Abhyankar pv = ba + 49 * diag_offset[row]; 708bef36659SShri Abhyankar pj = bj + diag_offset[row] + 1; 7099371c9d4SSatish Balay x1 = pv[0]; 7109371c9d4SSatish Balay x2 = pv[1]; 7119371c9d4SSatish Balay x3 = pv[2]; 7129371c9d4SSatish Balay x4 = pv[3]; 7139371c9d4SSatish Balay x5 = pv[4]; 7149371c9d4SSatish Balay x6 = pv[5]; 7159371c9d4SSatish Balay x7 = pv[6]; 7169371c9d4SSatish Balay x8 = pv[7]; 7179371c9d4SSatish Balay x9 = pv[8]; 7189371c9d4SSatish Balay x10 = pv[9]; 7199371c9d4SSatish Balay x11 = pv[10]; 7209371c9d4SSatish Balay x12 = pv[11]; 7219371c9d4SSatish Balay x13 = pv[12]; 7229371c9d4SSatish Balay x14 = pv[13]; 7239371c9d4SSatish Balay x15 = pv[14]; 7249371c9d4SSatish Balay x16 = pv[15]; 7259371c9d4SSatish Balay x17 = pv[16]; 7269371c9d4SSatish Balay x18 = pv[17]; 7279371c9d4SSatish Balay x19 = pv[18]; 7289371c9d4SSatish Balay x20 = pv[19]; 7299371c9d4SSatish Balay x21 = pv[20]; 7309371c9d4SSatish Balay x22 = pv[21]; 7319371c9d4SSatish Balay x23 = pv[22]; 7329371c9d4SSatish Balay x24 = pv[23]; 7339371c9d4SSatish Balay x25 = pv[24]; 7349371c9d4SSatish Balay x26 = pv[25]; 7359371c9d4SSatish Balay x27 = pv[26]; 7369371c9d4SSatish Balay x28 = pv[27]; 7379371c9d4SSatish Balay x29 = pv[28]; 7389371c9d4SSatish Balay x30 = pv[29]; 7399371c9d4SSatish Balay x31 = pv[30]; 7409371c9d4SSatish Balay x32 = pv[31]; 7419371c9d4SSatish Balay x33 = pv[32]; 7429371c9d4SSatish Balay x34 = pv[33]; 7439371c9d4SSatish Balay x35 = pv[34]; 7449371c9d4SSatish Balay x36 = pv[35]; 7459371c9d4SSatish Balay x37 = pv[36]; 7469371c9d4SSatish Balay x38 = pv[37]; 7479371c9d4SSatish Balay x39 = pv[38]; 7489371c9d4SSatish Balay x40 = pv[39]; 7499371c9d4SSatish Balay x41 = pv[40]; 7509371c9d4SSatish Balay x42 = pv[41]; 7519371c9d4SSatish Balay x43 = pv[42]; 7529371c9d4SSatish Balay x44 = pv[43]; 7539371c9d4SSatish Balay x45 = pv[44]; 7549371c9d4SSatish Balay x46 = pv[45]; 7559371c9d4SSatish Balay x47 = pv[46]; 7569371c9d4SSatish Balay x48 = pv[47]; 757bef36659SShri Abhyankar x49 = pv[48]; 758bef36659SShri Abhyankar pc[0] = m1 = p1 * x1 + p8 * x2 + p15 * x3 + p22 * x4 + p29 * x5 + p36 * x6 + p43 * x7; 759bef36659SShri Abhyankar pc[1] = m2 = p2 * x1 + p9 * x2 + p16 * x3 + p23 * x4 + p30 * x5 + p37 * x6 + p44 * x7; 760bef36659SShri Abhyankar pc[2] = m3 = p3 * x1 + p10 * x2 + p17 * x3 + p24 * x4 + p31 * x5 + p38 * x6 + p45 * x7; 761bef36659SShri Abhyankar pc[3] = m4 = p4 * x1 + p11 * x2 + p18 * x3 + p25 * x4 + p32 * x5 + p39 * x6 + p46 * x7; 762bef36659SShri Abhyankar pc[4] = m5 = p5 * x1 + p12 * x2 + p19 * x3 + p26 * x4 + p33 * x5 + p40 * x6 + p47 * x7; 763bef36659SShri Abhyankar pc[5] = m6 = p6 * x1 + p13 * x2 + p20 * x3 + p27 * x4 + p34 * x5 + p41 * x6 + p48 * x7; 764bef36659SShri Abhyankar pc[6] = m7 = p7 * x1 + p14 * x2 + p21 * x3 + p28 * x4 + p35 * x5 + p42 * x6 + p49 * x7; 765bef36659SShri Abhyankar 766bef36659SShri Abhyankar pc[7] = m8 = p1 * x8 + p8 * x9 + p15 * x10 + p22 * x11 + p29 * x12 + p36 * x13 + p43 * x14; 767bef36659SShri Abhyankar pc[8] = m9 = p2 * x8 + p9 * x9 + p16 * x10 + p23 * x11 + p30 * x12 + p37 * x13 + p44 * x14; 768bef36659SShri Abhyankar pc[9] = m10 = p3 * x8 + p10 * x9 + p17 * x10 + p24 * x11 + p31 * x12 + p38 * x13 + p45 * x14; 769bef36659SShri Abhyankar pc[10] = m11 = p4 * x8 + p11 * x9 + p18 * x10 + p25 * x11 + p32 * x12 + p39 * x13 + p46 * x14; 770bef36659SShri Abhyankar pc[11] = m12 = p5 * x8 + p12 * x9 + p19 * x10 + p26 * x11 + p33 * x12 + p40 * x13 + p47 * x14; 771bef36659SShri Abhyankar pc[12] = m13 = p6 * x8 + p13 * x9 + p20 * x10 + p27 * x11 + p34 * x12 + p41 * x13 + p48 * x14; 772bef36659SShri Abhyankar pc[13] = m14 = p7 * x8 + p14 * x9 + p21 * x10 + p28 * x11 + p35 * x12 + p42 * x13 + p49 * x14; 773bef36659SShri Abhyankar 774bef36659SShri Abhyankar pc[14] = m15 = p1 * x15 + p8 * x16 + p15 * x17 + p22 * x18 + p29 * x19 + p36 * x20 + p43 * x21; 775bef36659SShri Abhyankar pc[15] = m16 = p2 * x15 + p9 * x16 + p16 * x17 + p23 * x18 + p30 * x19 + p37 * x20 + p44 * x21; 776bef36659SShri Abhyankar pc[16] = m17 = p3 * x15 + p10 * x16 + p17 * x17 + p24 * x18 + p31 * x19 + p38 * x20 + p45 * x21; 777bef36659SShri Abhyankar pc[17] = m18 = p4 * x15 + p11 * x16 + p18 * x17 + p25 * x18 + p32 * x19 + p39 * x20 + p46 * x21; 778bef36659SShri Abhyankar pc[18] = m19 = p5 * x15 + p12 * x16 + p19 * x17 + p26 * x18 + p33 * x19 + p40 * x20 + p47 * x21; 779bef36659SShri Abhyankar pc[19] = m20 = p6 * x15 + p13 * x16 + p20 * x17 + p27 * x18 + p34 * x19 + p41 * x20 + p48 * x21; 780bef36659SShri Abhyankar pc[20] = m21 = p7 * x15 + p14 * x16 + p21 * x17 + p28 * x18 + p35 * x19 + p42 * x20 + p49 * x21; 781bef36659SShri Abhyankar 782bef36659SShri Abhyankar pc[21] = m22 = p1 * x22 + p8 * x23 + p15 * x24 + p22 * x25 + p29 * x26 + p36 * x27 + p43 * x28; 783bef36659SShri Abhyankar pc[22] = m23 = p2 * x22 + p9 * x23 + p16 * x24 + p23 * x25 + p30 * x26 + p37 * x27 + p44 * x28; 784bef36659SShri Abhyankar pc[23] = m24 = p3 * x22 + p10 * x23 + p17 * x24 + p24 * x25 + p31 * x26 + p38 * x27 + p45 * x28; 785bef36659SShri Abhyankar pc[24] = m25 = p4 * x22 + p11 * x23 + p18 * x24 + p25 * x25 + p32 * x26 + p39 * x27 + p46 * x28; 786bef36659SShri Abhyankar pc[25] = m26 = p5 * x22 + p12 * x23 + p19 * x24 + p26 * x25 + p33 * x26 + p40 * x27 + p47 * x28; 787bef36659SShri Abhyankar pc[26] = m27 = p6 * x22 + p13 * x23 + p20 * x24 + p27 * x25 + p34 * x26 + p41 * x27 + p48 * x28; 788bef36659SShri Abhyankar pc[27] = m28 = p7 * x22 + p14 * x23 + p21 * x24 + p28 * x25 + p35 * x26 + p42 * x27 + p49 * x28; 789bef36659SShri Abhyankar 790bef36659SShri Abhyankar pc[28] = m29 = p1 * x29 + p8 * x30 + p15 * x31 + p22 * x32 + p29 * x33 + p36 * x34 + p43 * x35; 791bef36659SShri Abhyankar pc[29] = m30 = p2 * x29 + p9 * x30 + p16 * x31 + p23 * x32 + p30 * x33 + p37 * x34 + p44 * x35; 792bef36659SShri Abhyankar pc[30] = m31 = p3 * x29 + p10 * x30 + p17 * x31 + p24 * x32 + p31 * x33 + p38 * x34 + p45 * x35; 793bef36659SShri Abhyankar pc[31] = m32 = p4 * x29 + p11 * x30 + p18 * x31 + p25 * x32 + p32 * x33 + p39 * x34 + p46 * x35; 794bef36659SShri Abhyankar pc[32] = m33 = p5 * x29 + p12 * x30 + p19 * x31 + p26 * x32 + p33 * x33 + p40 * x34 + p47 * x35; 795bef36659SShri Abhyankar pc[33] = m34 = p6 * x29 + p13 * x30 + p20 * x31 + p27 * x32 + p34 * x33 + p41 * x34 + p48 * x35; 796bef36659SShri Abhyankar pc[34] = m35 = p7 * x29 + p14 * x30 + p21 * x31 + p28 * x32 + p35 * x33 + p42 * x34 + p49 * x35; 797bef36659SShri Abhyankar 798bef36659SShri Abhyankar pc[35] = m36 = p1 * x36 + p8 * x37 + p15 * x38 + p22 * x39 + p29 * x40 + p36 * x41 + p43 * x42; 799bef36659SShri Abhyankar pc[36] = m37 = p2 * x36 + p9 * x37 + p16 * x38 + p23 * x39 + p30 * x40 + p37 * x41 + p44 * x42; 800bef36659SShri Abhyankar pc[37] = m38 = p3 * x36 + p10 * x37 + p17 * x38 + p24 * x39 + p31 * x40 + p38 * x41 + p45 * x42; 801bef36659SShri Abhyankar pc[38] = m39 = p4 * x36 + p11 * x37 + p18 * x38 + p25 * x39 + p32 * x40 + p39 * x41 + p46 * x42; 802bef36659SShri Abhyankar pc[39] = m40 = p5 * x36 + p12 * x37 + p19 * x38 + p26 * x39 + p33 * x40 + p40 * x41 + p47 * x42; 803bef36659SShri Abhyankar pc[40] = m41 = p6 * x36 + p13 * x37 + p20 * x38 + p27 * x39 + p34 * x40 + p41 * x41 + p48 * x42; 804bef36659SShri Abhyankar pc[41] = m42 = p7 * x36 + p14 * x37 + p21 * x38 + p28 * x39 + p35 * x40 + p42 * x41 + p49 * x42; 805bef36659SShri Abhyankar 806bef36659SShri Abhyankar pc[42] = m43 = p1 * x43 + p8 * x44 + p15 * x45 + p22 * x46 + p29 * x47 + p36 * x48 + p43 * x49; 807bef36659SShri Abhyankar pc[43] = m44 = p2 * x43 + p9 * x44 + p16 * x45 + p23 * x46 + p30 * x47 + p37 * x48 + p44 * x49; 808bef36659SShri Abhyankar pc[44] = m45 = p3 * x43 + p10 * x44 + p17 * x45 + p24 * x46 + p31 * x47 + p38 * x48 + p45 * x49; 809bef36659SShri Abhyankar pc[45] = m46 = p4 * x43 + p11 * x44 + p18 * x45 + p25 * x46 + p32 * x47 + p39 * x48 + p46 * x49; 810bef36659SShri Abhyankar pc[46] = m47 = p5 * x43 + p12 * x44 + p19 * x45 + p26 * x46 + p33 * x47 + p40 * x48 + p47 * x49; 811bef36659SShri Abhyankar pc[47] = m48 = p6 * x43 + p13 * x44 + p20 * x45 + p27 * x46 + p34 * x47 + p41 * x48 + p48 * x49; 812bef36659SShri Abhyankar pc[48] = m49 = p7 * x43 + p14 * x44 + p21 * x45 + p28 * x46 + p35 * x47 + p42 * x48 + p49 * x49; 813bef36659SShri Abhyankar 814bef36659SShri Abhyankar nz = bi[row + 1] - diag_offset[row] - 1; 815bef36659SShri Abhyankar pv += 49; 816bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 8179371c9d4SSatish Balay x1 = pv[0]; 8189371c9d4SSatish Balay x2 = pv[1]; 8199371c9d4SSatish Balay x3 = pv[2]; 8209371c9d4SSatish Balay x4 = pv[3]; 8219371c9d4SSatish Balay x5 = pv[4]; 8229371c9d4SSatish Balay x6 = pv[5]; 8239371c9d4SSatish Balay x7 = pv[6]; 8249371c9d4SSatish Balay x8 = pv[7]; 8259371c9d4SSatish Balay x9 = pv[8]; 8269371c9d4SSatish Balay x10 = pv[9]; 8279371c9d4SSatish Balay x11 = pv[10]; 8289371c9d4SSatish Balay x12 = pv[11]; 8299371c9d4SSatish Balay x13 = pv[12]; 8309371c9d4SSatish Balay x14 = pv[13]; 8319371c9d4SSatish Balay x15 = pv[14]; 8329371c9d4SSatish Balay x16 = pv[15]; 8339371c9d4SSatish Balay x17 = pv[16]; 8349371c9d4SSatish Balay x18 = pv[17]; 8359371c9d4SSatish Balay x19 = pv[18]; 8369371c9d4SSatish Balay x20 = pv[19]; 8379371c9d4SSatish Balay x21 = pv[20]; 8389371c9d4SSatish Balay x22 = pv[21]; 8399371c9d4SSatish Balay x23 = pv[22]; 8409371c9d4SSatish Balay x24 = pv[23]; 8419371c9d4SSatish Balay x25 = pv[24]; 8429371c9d4SSatish Balay x26 = pv[25]; 8439371c9d4SSatish Balay x27 = pv[26]; 8449371c9d4SSatish Balay x28 = pv[27]; 8459371c9d4SSatish Balay x29 = pv[28]; 8469371c9d4SSatish Balay x30 = pv[29]; 8479371c9d4SSatish Balay x31 = pv[30]; 8489371c9d4SSatish Balay x32 = pv[31]; 8499371c9d4SSatish Balay x33 = pv[32]; 8509371c9d4SSatish Balay x34 = pv[33]; 8519371c9d4SSatish Balay x35 = pv[34]; 8529371c9d4SSatish Balay x36 = pv[35]; 8539371c9d4SSatish Balay x37 = pv[36]; 8549371c9d4SSatish Balay x38 = pv[37]; 8559371c9d4SSatish Balay x39 = pv[38]; 8569371c9d4SSatish Balay x40 = pv[39]; 8579371c9d4SSatish Balay x41 = pv[40]; 8589371c9d4SSatish Balay x42 = pv[41]; 8599371c9d4SSatish Balay x43 = pv[42]; 8609371c9d4SSatish Balay x44 = pv[43]; 8619371c9d4SSatish Balay x45 = pv[44]; 8629371c9d4SSatish Balay x46 = pv[45]; 8639371c9d4SSatish Balay x47 = pv[46]; 8649371c9d4SSatish Balay x48 = pv[47]; 865bef36659SShri Abhyankar x49 = pv[48]; 866bef36659SShri Abhyankar x = rtmp + 49 * pj[j]; 867bef36659SShri Abhyankar x[0] -= m1 * x1 + m8 * x2 + m15 * x3 + m22 * x4 + m29 * x5 + m36 * x6 + m43 * x7; 868bef36659SShri Abhyankar x[1] -= m2 * x1 + m9 * x2 + m16 * x3 + m23 * x4 + m30 * x5 + m37 * x6 + m44 * x7; 869bef36659SShri Abhyankar x[2] -= m3 * x1 + m10 * x2 + m17 * x3 + m24 * x4 + m31 * x5 + m38 * x6 + m45 * x7; 870bef36659SShri Abhyankar x[3] -= m4 * x1 + m11 * x2 + m18 * x3 + m25 * x4 + m32 * x5 + m39 * x6 + m46 * x7; 871bef36659SShri Abhyankar x[4] -= m5 * x1 + m12 * x2 + m19 * x3 + m26 * x4 + m33 * x5 + m40 * x6 + m47 * x7; 872bef36659SShri Abhyankar x[5] -= m6 * x1 + m13 * x2 + m20 * x3 + m27 * x4 + m34 * x5 + m41 * x6 + m48 * x7; 873bef36659SShri Abhyankar x[6] -= m7 * x1 + m14 * x2 + m21 * x3 + m28 * x4 + m35 * x5 + m42 * x6 + m49 * x7; 874bef36659SShri Abhyankar 875bef36659SShri Abhyankar x[7] -= m1 * x8 + m8 * x9 + m15 * x10 + m22 * x11 + m29 * x12 + m36 * x13 + m43 * x14; 876bef36659SShri Abhyankar x[8] -= m2 * x8 + m9 * x9 + m16 * x10 + m23 * x11 + m30 * x12 + m37 * x13 + m44 * x14; 877bef36659SShri Abhyankar x[9] -= m3 * x8 + m10 * x9 + m17 * x10 + m24 * x11 + m31 * x12 + m38 * x13 + m45 * x14; 878bef36659SShri Abhyankar x[10] -= m4 * x8 + m11 * x9 + m18 * x10 + m25 * x11 + m32 * x12 + m39 * x13 + m46 * x14; 879bef36659SShri Abhyankar x[11] -= m5 * x8 + m12 * x9 + m19 * x10 + m26 * x11 + m33 * x12 + m40 * x13 + m47 * x14; 880bef36659SShri Abhyankar x[12] -= m6 * x8 + m13 * x9 + m20 * x10 + m27 * x11 + m34 * x12 + m41 * x13 + m48 * x14; 881bef36659SShri Abhyankar x[13] -= m7 * x8 + m14 * x9 + m21 * x10 + m28 * x11 + m35 * x12 + m42 * x13 + m49 * x14; 882bef36659SShri Abhyankar 883bef36659SShri Abhyankar x[14] -= m1 * x15 + m8 * x16 + m15 * x17 + m22 * x18 + m29 * x19 + m36 * x20 + m43 * x21; 884bef36659SShri Abhyankar x[15] -= m2 * x15 + m9 * x16 + m16 * x17 + m23 * x18 + m30 * x19 + m37 * x20 + m44 * x21; 885bef36659SShri Abhyankar x[16] -= m3 * x15 + m10 * x16 + m17 * x17 + m24 * x18 + m31 * x19 + m38 * x20 + m45 * x21; 886bef36659SShri Abhyankar x[17] -= m4 * x15 + m11 * x16 + m18 * x17 + m25 * x18 + m32 * x19 + m39 * x20 + m46 * x21; 887bef36659SShri Abhyankar x[18] -= m5 * x15 + m12 * x16 + m19 * x17 + m26 * x18 + m33 * x19 + m40 * x20 + m47 * x21; 888bef36659SShri Abhyankar x[19] -= m6 * x15 + m13 * x16 + m20 * x17 + m27 * x18 + m34 * x19 + m41 * x20 + m48 * x21; 889bef36659SShri Abhyankar x[20] -= m7 * x15 + m14 * x16 + m21 * x17 + m28 * x18 + m35 * x19 + m42 * x20 + m49 * x21; 890bef36659SShri Abhyankar 891bef36659SShri Abhyankar x[21] -= m1 * x22 + m8 * x23 + m15 * x24 + m22 * x25 + m29 * x26 + m36 * x27 + m43 * x28; 892bef36659SShri Abhyankar x[22] -= m2 * x22 + m9 * x23 + m16 * x24 + m23 * x25 + m30 * x26 + m37 * x27 + m44 * x28; 893bef36659SShri Abhyankar x[23] -= m3 * x22 + m10 * x23 + m17 * x24 + m24 * x25 + m31 * x26 + m38 * x27 + m45 * x28; 894bef36659SShri Abhyankar x[24] -= m4 * x22 + m11 * x23 + m18 * x24 + m25 * x25 + m32 * x26 + m39 * x27 + m46 * x28; 895bef36659SShri Abhyankar x[25] -= m5 * x22 + m12 * x23 + m19 * x24 + m26 * x25 + m33 * x26 + m40 * x27 + m47 * x28; 896bef36659SShri Abhyankar x[26] -= m6 * x22 + m13 * x23 + m20 * x24 + m27 * x25 + m34 * x26 + m41 * x27 + m48 * x28; 897bef36659SShri Abhyankar x[27] -= m7 * x22 + m14 * x23 + m21 * x24 + m28 * x25 + m35 * x26 + m42 * x27 + m49 * x28; 898bef36659SShri Abhyankar 899bef36659SShri Abhyankar x[28] -= m1 * x29 + m8 * x30 + m15 * x31 + m22 * x32 + m29 * x33 + m36 * x34 + m43 * x35; 900bef36659SShri Abhyankar x[29] -= m2 * x29 + m9 * x30 + m16 * x31 + m23 * x32 + m30 * x33 + m37 * x34 + m44 * x35; 901bef36659SShri Abhyankar x[30] -= m3 * x29 + m10 * x30 + m17 * x31 + m24 * x32 + m31 * x33 + m38 * x34 + m45 * x35; 902bef36659SShri Abhyankar x[31] -= m4 * x29 + m11 * x30 + m18 * x31 + m25 * x32 + m32 * x33 + m39 * x34 + m46 * x35; 903bef36659SShri Abhyankar x[32] -= m5 * x29 + m12 * x30 + m19 * x31 + m26 * x32 + m33 * x33 + m40 * x34 + m47 * x35; 904bef36659SShri Abhyankar x[33] -= m6 * x29 + m13 * x30 + m20 * x31 + m27 * x32 + m34 * x33 + m41 * x34 + m48 * x35; 905bef36659SShri Abhyankar x[34] -= m7 * x29 + m14 * x30 + m21 * x31 + m28 * x32 + m35 * x33 + m42 * x34 + m49 * x35; 906bef36659SShri Abhyankar 907bef36659SShri Abhyankar x[35] -= m1 * x36 + m8 * x37 + m15 * x38 + m22 * x39 + m29 * x40 + m36 * x41 + m43 * x42; 908bef36659SShri Abhyankar x[36] -= m2 * x36 + m9 * x37 + m16 * x38 + m23 * x39 + m30 * x40 + m37 * x41 + m44 * x42; 909bef36659SShri Abhyankar x[37] -= m3 * x36 + m10 * x37 + m17 * x38 + m24 * x39 + m31 * x40 + m38 * x41 + m45 * x42; 910bef36659SShri Abhyankar x[38] -= m4 * x36 + m11 * x37 + m18 * x38 + m25 * x39 + m32 * x40 + m39 * x41 + m46 * x42; 911bef36659SShri Abhyankar x[39] -= m5 * x36 + m12 * x37 + m19 * x38 + m26 * x39 + m33 * x40 + m40 * x41 + m47 * x42; 912bef36659SShri Abhyankar x[40] -= m6 * x36 + m13 * x37 + m20 * x38 + m27 * x39 + m34 * x40 + m41 * x41 + m48 * x42; 913bef36659SShri Abhyankar x[41] -= m7 * x36 + m14 * x37 + m21 * x38 + m28 * x39 + m35 * x40 + m42 * x41 + m49 * x42; 914bef36659SShri Abhyankar 915bef36659SShri Abhyankar x[42] -= m1 * x43 + m8 * x44 + m15 * x45 + m22 * x46 + m29 * x47 + m36 * x48 + m43 * x49; 916bef36659SShri Abhyankar x[43] -= m2 * x43 + m9 * x44 + m16 * x45 + m23 * x46 + m30 * x47 + m37 * x48 + m44 * x49; 917bef36659SShri Abhyankar x[44] -= m3 * x43 + m10 * x44 + m17 * x45 + m24 * x46 + m31 * x47 + m38 * x48 + m45 * x49; 918bef36659SShri Abhyankar x[45] -= m4 * x43 + m11 * x44 + m18 * x45 + m25 * x46 + m32 * x47 + m39 * x48 + m46 * x49; 919bef36659SShri Abhyankar x[46] -= m5 * x43 + m12 * x44 + m19 * x45 + m26 * x46 + m33 * x47 + m40 * x48 + m47 * x49; 920bef36659SShri Abhyankar x[47] -= m6 * x43 + m13 * x44 + m20 * x45 + m27 * x46 + m34 * x47 + m41 * x48 + m48 * x49; 921bef36659SShri Abhyankar x[48] -= m7 * x43 + m14 * x44 + m21 * x45 + m28 * x46 + m35 * x47 + m42 * x48 + m49 * x49; 922bef36659SShri Abhyankar pv += 49; 923bef36659SShri Abhyankar } 9249566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637.0)); 925bef36659SShri Abhyankar } 926bef36659SShri Abhyankar row = *ajtmp++; 927bef36659SShri Abhyankar } 928bef36659SShri Abhyankar /* finished row so stick it into b->a */ 929bef36659SShri Abhyankar pv = ba + 49 * bi[i]; 930bef36659SShri Abhyankar pj = bj + bi[i]; 931bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 932bef36659SShri Abhyankar for (j = 0; j < nz; j++) { 933bef36659SShri Abhyankar x = rtmp + 49 * pj[j]; 9349371c9d4SSatish Balay pv[0] = x[0]; 9359371c9d4SSatish Balay pv[1] = x[1]; 9369371c9d4SSatish Balay pv[2] = x[2]; 9379371c9d4SSatish Balay pv[3] = x[3]; 9389371c9d4SSatish Balay pv[4] = x[4]; 9399371c9d4SSatish Balay pv[5] = x[5]; 9409371c9d4SSatish Balay pv[6] = x[6]; 9419371c9d4SSatish Balay pv[7] = x[7]; 9429371c9d4SSatish Balay pv[8] = x[8]; 9439371c9d4SSatish Balay pv[9] = x[9]; 9449371c9d4SSatish Balay pv[10] = x[10]; 9459371c9d4SSatish Balay pv[11] = x[11]; 9469371c9d4SSatish Balay pv[12] = x[12]; 9479371c9d4SSatish Balay pv[13] = x[13]; 9489371c9d4SSatish Balay pv[14] = x[14]; 9499371c9d4SSatish Balay pv[15] = x[15]; 9509371c9d4SSatish Balay pv[16] = x[16]; 9519371c9d4SSatish Balay pv[17] = x[17]; 9529371c9d4SSatish Balay pv[18] = x[18]; 9539371c9d4SSatish Balay pv[19] = x[19]; 9549371c9d4SSatish Balay pv[20] = x[20]; 9559371c9d4SSatish Balay pv[21] = x[21]; 9569371c9d4SSatish Balay pv[22] = x[22]; 9579371c9d4SSatish Balay pv[23] = x[23]; 9589371c9d4SSatish Balay pv[24] = x[24]; 9599371c9d4SSatish Balay pv[25] = x[25]; 9609371c9d4SSatish Balay pv[26] = x[26]; 9619371c9d4SSatish Balay pv[27] = x[27]; 9629371c9d4SSatish Balay pv[28] = x[28]; 9639371c9d4SSatish Balay pv[29] = x[29]; 9649371c9d4SSatish Balay pv[30] = x[30]; 9659371c9d4SSatish Balay pv[31] = x[31]; 9669371c9d4SSatish Balay pv[32] = x[32]; 9679371c9d4SSatish Balay pv[33] = x[33]; 9689371c9d4SSatish Balay pv[34] = x[34]; 9699371c9d4SSatish Balay pv[35] = x[35]; 9709371c9d4SSatish Balay pv[36] = x[36]; 9719371c9d4SSatish Balay pv[37] = x[37]; 9729371c9d4SSatish Balay pv[38] = x[38]; 9739371c9d4SSatish Balay pv[39] = x[39]; 9749371c9d4SSatish Balay pv[40] = x[40]; 9759371c9d4SSatish Balay pv[41] = x[41]; 9769371c9d4SSatish Balay pv[42] = x[42]; 9779371c9d4SSatish Balay pv[43] = x[43]; 9789371c9d4SSatish Balay pv[44] = x[44]; 9799371c9d4SSatish Balay pv[45] = x[45]; 9809371c9d4SSatish Balay pv[46] = x[46]; 9819371c9d4SSatish Balay pv[47] = x[47]; 982bef36659SShri Abhyankar pv[48] = x[48]; 983bef36659SShri Abhyankar pv += 49; 984bef36659SShri Abhyankar } 985bef36659SShri Abhyankar /* invert diagonal block */ 986bef36659SShri Abhyankar w = ba + 49 * diag_offset[i]; 9879566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(w, shift, allowzeropivot, &zeropivotdetected)); 9887b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 989bef36659SShri Abhyankar } 990bef36659SShri Abhyankar 9919566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 99226fbe8dcSKarl Rupp 99306e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_inplace; 99406e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace; 995bef36659SShri Abhyankar C->assembled = PETSC_TRUE; 99626fbe8dcSKarl Rupp 9979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * b->mbs)); /* from inverting diagonal blocks */ 998bef36659SShri Abhyankar PetscFunctionReturn(0); 999bef36659SShri Abhyankar } 1000bef36659SShri Abhyankar 10019371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) { 1002bef36659SShri Abhyankar Mat C = B; 1003bef36659SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 1004bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row; 1005bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1006bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; 1007bef36659SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; 1008bbd65245SShri Abhyankar PetscInt flg; 1009182b8fbaSHong Zhang PetscReal shift = info->shiftamount; 1010a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected; 1011bef36659SShri Abhyankar 1012bef36659SShri Abhyankar PetscFunctionBegin; 10130164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure); 10140164db54SHong Zhang 1015bef36659SShri Abhyankar /* generate work space needed by the factorization */ 10169566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 10179566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n)); 1018bef36659SShri Abhyankar 1019bef36659SShri Abhyankar for (i = 0; i < n; i++) { 1020bef36659SShri Abhyankar /* zero rtmp */ 1021bef36659SShri Abhyankar /* L part */ 1022bef36659SShri Abhyankar nz = bi[i + 1] - bi[i]; 1023bef36659SShri Abhyankar bjtmp = bj + bi[i]; 1024*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 1025bef36659SShri Abhyankar 1026bef36659SShri Abhyankar /* U part */ 102753cca76cSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 102853cca76cSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 1029*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 103053cca76cSShri Abhyankar 103153cca76cSShri Abhyankar /* load in initial (unfactored row) */ 103253cca76cSShri Abhyankar nz = ai[i + 1] - ai[i]; 103353cca76cSShri Abhyankar ajtmp = aj + ai[i]; 103453cca76cSShri Abhyankar v = aa + bs2 * ai[i]; 1035*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); 103653cca76cSShri Abhyankar 103753cca76cSShri Abhyankar /* elimination */ 103853cca76cSShri Abhyankar bjtmp = bj + bi[i]; 103953cca76cSShri Abhyankar nzL = bi[i + 1] - bi[i]; 104053cca76cSShri Abhyankar for (k = 0; k < nzL; k++) { 104153cca76cSShri Abhyankar row = bjtmp[k]; 104253cca76cSShri Abhyankar pc = rtmp + bs2 * row; 1043c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) { 1044c35f09e5SBarry Smith if (pc[j] != 0.0) { 1045c35f09e5SBarry Smith flg = 1; 1046c35f09e5SBarry Smith break; 1047c35f09e5SBarry Smith } 1048c35f09e5SBarry Smith } 104953cca76cSShri Abhyankar if (flg) { 105053cca76cSShri Abhyankar pv = b->a + bs2 * bdiag[row]; 105196b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 10529566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_7(pc, pv, mwork)); 105353cca76cSShri Abhyankar 1054a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 105553cca76cSShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1); 105653cca76cSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 105753cca76cSShri Abhyankar for (j = 0; j < nz; j++) { 105896b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 105953cca76cSShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 106053cca76cSShri Abhyankar v = rtmp + bs2 * pj[j]; 10619566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_7(v, pc, pv)); 106253cca76cSShri Abhyankar pv += bs2; 106353cca76cSShri Abhyankar } 10649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(686.0 * nz + 637)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 106553cca76cSShri Abhyankar } 106653cca76cSShri Abhyankar } 106753cca76cSShri Abhyankar 106853cca76cSShri Abhyankar /* finished row so stick it into b->a */ 106953cca76cSShri Abhyankar /* L part */ 107053cca76cSShri Abhyankar pv = b->a + bs2 * bi[i]; 107153cca76cSShri Abhyankar pj = b->j + bi[i]; 107253cca76cSShri Abhyankar nz = bi[i + 1] - bi[i]; 1073*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 107453cca76cSShri Abhyankar 1075a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 107653cca76cSShri Abhyankar pv = b->a + bs2 * bdiag[i]; 107753cca76cSShri Abhyankar pj = b->j + bdiag[i]; 10789566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 10799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(pv, shift, allowzeropivot, &zeropivotdetected)); 10807b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108153cca76cSShri Abhyankar 108253cca76cSShri Abhyankar /* U part */ 108353cca76cSShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1); 108453cca76cSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 108553cca76cSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 1086*48a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 108753cca76cSShri Abhyankar } 10889566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork)); 108926fbe8dcSKarl Rupp 10904dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 10914dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 109253cca76cSShri Abhyankar C->assembled = PETSC_TRUE; 109326fbe8dcSKarl Rupp 10949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 7 * 7 * 7 * n)); /* from inverting diagonal blocks */ 109553cca76cSShri Abhyankar PetscFunctionReturn(0); 109653cca76cSShri Abhyankar } 1097