1 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petsc/private/kernels/blockinvert.h> 4 5 /* Version for when blocks are 6 by 6 */ 6 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat C, Mat A, const MatFactorInfo *info) 7 { 8 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 9 IS perm = b->row; 10 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 11 PetscInt i, j, *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 12 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 13 MatScalar *u, *d, *w, *wp, u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12; 14 MatScalar u13, u14, u15, u16, u17, u18, u19, u20, u21, u22, u23, u24, u25, u26, u27; 15 MatScalar u28, u29, u30, u31, u32, u33, u34, u35; 16 PetscReal shift = info->shiftamount; 17 PetscBool allowzeropivot, zeropivotdetected; 18 19 PetscFunctionBegin; 20 /* initialization */ 21 allowzeropivot = PetscNot(A->erroriffailure); 22 PetscCall(PetscCalloc1(36 * mbs, &w)); 23 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 24 il[0] = 0; 25 for (i = 0; i < mbs; i++) jl[i] = mbs; 26 27 PetscCall(PetscMalloc2(36, &dk, 36, &uik)); 28 PetscCall(ISGetIndices(perm, &perm_ptr)); 29 30 /* check permutation */ 31 if (!a->permute) { 32 ai = a->i; 33 aj = a->j; 34 aa = a->a; 35 } else { 36 ai = a->inew; 37 aj = a->jnew; 38 PetscCall(PetscMalloc1(36 * ai[mbs], &aa)); 39 PetscCall(PetscArraycpy(aa, a->a, 36 * ai[mbs])); 40 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 41 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 42 43 for (i = 0; i < mbs; i++) { 44 jmin = ai[i]; 45 jmax = ai[i + 1]; 46 for (j = jmin; j < jmax; j++) { 47 while (a2anew[j] != j) { 48 k = a2anew[j]; 49 a2anew[j] = a2anew[k]; 50 a2anew[k] = k; 51 for (k1 = 0; k1 < 36; k1++) { 52 dk[k1] = aa[k * 36 + k1]; 53 aa[k * 36 + k1] = aa[j * 36 + k1]; 54 aa[j * 36 + k1] = dk[k1]; 55 } 56 } 57 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 58 if (i > aj[j]) { 59 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 60 ap = aa + j * 36; /* ptr to the beginning of j-th block of aa */ 61 for (k = 0; k < 36; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 62 for (k = 0; k < 6; k++) { /* j-th block of aa <- dk^T */ 63 for (k1 = 0; k1 < 6; k1++) *ap++ = dk[k + 6 * k1]; 64 } 65 } 66 } 67 } 68 PetscCall(PetscFree(a2anew)); 69 } 70 71 /* for each row k */ 72 for (k = 0; k < mbs; k++) { 73 /*initialize k-th row with elements nonzero in row perm(k) of A */ 74 jmin = ai[perm_ptr[k]]; 75 jmax = ai[perm_ptr[k] + 1]; 76 if (jmin < jmax) { 77 ap = aa + jmin * 36; 78 for (j = jmin; j < jmax; j++) { 79 vj = perm_ptr[aj[j]]; /* block col. index */ 80 wp = w + vj * 36; 81 for (i = 0; i < 36; i++) *wp++ = *ap++; 82 } 83 } 84 85 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 86 PetscCall(PetscArraycpy(dk, w + k * 36, 36)); 87 i = jl[k]; /* first row to be added to k_th row */ 88 89 while (i < mbs) { 90 nexti = jl[i]; /* next row to be added to k_th row */ 91 92 /* compute multiplier */ 93 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 94 95 /* uik = -inv(Di)*U_bar(i,k) */ 96 d = ba + i * 36; 97 u = ba + ili * 36; 98 99 u0 = u[0]; 100 u1 = u[1]; 101 u2 = u[2]; 102 u3 = u[3]; 103 u4 = u[4]; 104 u5 = u[5]; 105 u6 = u[6]; 106 u7 = u[7]; 107 u8 = u[8]; 108 u9 = u[9]; 109 u10 = u[10]; 110 u11 = u[11]; 111 u12 = u[12]; 112 u13 = u[13]; 113 u14 = u[14]; 114 u15 = u[15]; 115 u16 = u[16]; 116 u17 = u[17]; 117 u18 = u[18]; 118 u19 = u[19]; 119 u20 = u[20]; 120 u21 = u[21]; 121 u22 = u[22]; 122 u23 = u[23]; 123 u24 = u[24]; 124 u25 = u[25]; 125 u26 = u[26]; 126 u27 = u[27]; 127 u28 = u[28]; 128 u29 = u[29]; 129 u30 = u[30]; 130 u31 = u[31]; 131 u32 = u[32]; 132 u33 = u[33]; 133 u34 = u[34]; 134 u35 = u[35]; 135 136 uik[0] = -(d[0] * u0 + d[6] * u1 + d[12] * u2 + d[18] * u3 + d[24] * u4 + d[30] * u5); 137 uik[1] = -(d[1] * u0 + d[7] * u1 + d[13] * u2 + d[19] * u3 + d[25] * u4 + d[31] * u5); 138 uik[2] = -(d[2] * u0 + d[8] * u1 + d[14] * u2 + d[20] * u3 + d[26] * u4 + d[32] * u5); 139 uik[3] = -(d[3] * u0 + d[9] * u1 + d[15] * u2 + d[21] * u3 + d[27] * u4 + d[33] * u5); 140 uik[4] = -(d[4] * u0 + d[10] * u1 + d[16] * u2 + d[22] * u3 + d[28] * u4 + d[34] * u5); 141 uik[5] = -(d[5] * u0 + d[11] * u1 + d[17] * u2 + d[23] * u3 + d[29] * u4 + d[35] * u5); 142 143 uik[6] = -(d[0] * u6 + d[6] * u7 + d[12] * u8 + d[18] * u9 + d[24] * u10 + d[30] * u11); 144 uik[7] = -(d[1] * u6 + d[7] * u7 + d[13] * u8 + d[19] * u9 + d[25] * u10 + d[31] * u11); 145 uik[8] = -(d[2] * u6 + d[8] * u7 + d[14] * u8 + d[20] * u9 + d[26] * u10 + d[32] * u11); 146 uik[9] = -(d[3] * u6 + d[9] * u7 + d[15] * u8 + d[21] * u9 + d[27] * u10 + d[33] * u11); 147 uik[10] = -(d[4] * u6 + d[10] * u7 + d[16] * u8 + d[22] * u9 + d[28] * u10 + d[34] * u11); 148 uik[11] = -(d[5] * u6 + d[11] * u7 + d[17] * u8 + d[23] * u9 + d[29] * u10 + d[35] * u11); 149 150 uik[12] = -(d[0] * u12 + d[6] * u13 + d[12] * u14 + d[18] * u15 + d[24] * u16 + d[30] * u17); 151 uik[13] = -(d[1] * u12 + d[7] * u13 + d[13] * u14 + d[19] * u15 + d[25] * u16 + d[31] * u17); 152 uik[14] = -(d[2] * u12 + d[8] * u13 + d[14] * u14 + d[20] * u15 + d[26] * u16 + d[32] * u17); 153 uik[15] = -(d[3] * u12 + d[9] * u13 + d[15] * u14 + d[21] * u15 + d[27] * u16 + d[33] * u17); 154 uik[16] = -(d[4] * u12 + d[10] * u13 + d[16] * u14 + d[22] * u15 + d[28] * u16 + d[34] * u17); 155 uik[17] = -(d[5] * u12 + d[11] * u13 + d[17] * u14 + d[23] * u15 + d[29] * u16 + d[35] * u17); 156 157 uik[18] = -(d[0] * u18 + d[6] * u19 + d[12] * u20 + d[18] * u21 + d[24] * u22 + d[30] * u23); 158 uik[19] = -(d[1] * u18 + d[7] * u19 + d[13] * u20 + d[19] * u21 + d[25] * u22 + d[31] * u23); 159 uik[20] = -(d[2] * u18 + d[8] * u19 + d[14] * u20 + d[20] * u21 + d[26] * u22 + d[32] * u23); 160 uik[21] = -(d[3] * u18 + d[9] * u19 + d[15] * u20 + d[21] * u21 + d[27] * u22 + d[33] * u23); 161 uik[22] = -(d[4] * u18 + d[10] * u19 + d[16] * u20 + d[22] * u21 + d[28] * u22 + d[34] * u23); 162 uik[23] = -(d[5] * u18 + d[11] * u19 + d[17] * u20 + d[23] * u21 + d[29] * u22 + d[35] * u23); 163 164 uik[24] = -(d[0] * u24 + d[6] * u25 + d[12] * u26 + d[18] * u27 + d[24] * u28 + d[30] * u29); 165 uik[25] = -(d[1] * u24 + d[7] * u25 + d[13] * u26 + d[19] * u27 + d[25] * u28 + d[31] * u29); 166 uik[26] = -(d[2] * u24 + d[8] * u25 + d[14] * u26 + d[20] * u27 + d[26] * u28 + d[32] * u29); 167 uik[27] = -(d[3] * u24 + d[9] * u25 + d[15] * u26 + d[21] * u27 + d[27] * u28 + d[33] * u29); 168 uik[28] = -(d[4] * u24 + d[10] * u25 + d[16] * u26 + d[22] * u27 + d[28] * u28 + d[34] * u29); 169 uik[29] = -(d[5] * u24 + d[11] * u25 + d[17] * u26 + d[23] * u27 + d[29] * u28 + d[35] * u29); 170 171 uik[30] = -(d[0] * u30 + d[6] * u31 + d[12] * u32 + d[18] * u33 + d[24] * u34 + d[30] * u35); 172 uik[31] = -(d[1] * u30 + d[7] * u31 + d[13] * u32 + d[19] * u33 + d[25] * u34 + d[31] * u35); 173 uik[32] = -(d[2] * u30 + d[8] * u31 + d[14] * u32 + d[20] * u33 + d[26] * u34 + d[32] * u35); 174 uik[33] = -(d[3] * u30 + d[9] * u31 + d[15] * u32 + d[21] * u33 + d[27] * u34 + d[33] * u35); 175 uik[34] = -(d[4] * u30 + d[10] * u31 + d[16] * u32 + d[22] * u33 + d[28] * u34 + d[34] * u35); 176 uik[35] = -(d[5] * u30 + d[11] * u31 + d[17] * u32 + d[23] * u33 + d[29] * u34 + d[35] * u35); 177 178 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 179 dk[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5; 180 dk[1] += uik[6] * u0 + uik[7] * u1 + uik[8] * u2 + uik[9] * u3 + uik[10] * u4 + uik[11] * u5; 181 dk[2] += uik[12] * u0 + uik[13] * u1 + uik[14] * u2 + uik[15] * u3 + uik[16] * u4 + uik[17] * u5; 182 dk[3] += uik[18] * u0 + uik[19] * u1 + uik[20] * u2 + uik[21] * u3 + uik[22] * u4 + uik[23] * u5; 183 dk[4] += uik[24] * u0 + uik[25] * u1 + uik[26] * u2 + uik[27] * u3 + uik[28] * u4 + uik[29] * u5; 184 dk[5] += uik[30] * u0 + uik[31] * u1 + uik[32] * u2 + uik[33] * u3 + uik[34] * u4 + uik[35] * u5; 185 186 dk[6] += uik[0] * u6 + uik[1] * u7 + uik[2] * u8 + uik[3] * u9 + uik[4] * u10 + uik[5] * u11; 187 dk[7] += uik[6] * u6 + uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11; 188 dk[8] += uik[12] * u6 + uik[13] * u7 + uik[14] * u8 + uik[15] * u9 + uik[16] * u10 + uik[17] * u11; 189 dk[9] += uik[18] * u6 + uik[19] * u7 + uik[20] * u8 + uik[21] * u9 + uik[22] * u10 + uik[23] * u11; 190 dk[10] += uik[24] * u6 + uik[25] * u7 + uik[26] * u8 + uik[27] * u9 + uik[28] * u10 + uik[29] * u11; 191 dk[11] += uik[30] * u6 + uik[31] * u7 + uik[32] * u8 + uik[33] * u9 + uik[34] * u10 + uik[35] * u11; 192 193 dk[12] += uik[0] * u12 + uik[1] * u13 + uik[2] * u14 + uik[3] * u15 + uik[4] * u16 + uik[5] * u17; 194 dk[13] += uik[6] * u12 + uik[7] * u13 + uik[8] * u14 + uik[9] * u15 + uik[10] * u16 + uik[11] * u17; 195 dk[14] += uik[12] * u12 + uik[13] * u13 + uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17; 196 dk[15] += uik[18] * u12 + uik[19] * u13 + uik[20] * u14 + uik[21] * u15 + uik[22] * u16 + uik[23] * u17; 197 dk[16] += uik[24] * u12 + uik[25] * u13 + uik[26] * u14 + uik[27] * u15 + uik[28] * u16 + uik[29] * u17; 198 dk[17] += uik[30] * u12 + uik[31] * u13 + uik[32] * u14 + uik[33] * u15 + uik[34] * u16 + uik[35] * u17; 199 200 dk[18] += uik[0] * u18 + uik[1] * u19 + uik[2] * u20 + uik[3] * u21 + uik[4] * u22 + uik[5] * u23; 201 dk[19] += uik[6] * u18 + uik[7] * u19 + uik[8] * u20 + uik[9] * u21 + uik[10] * u22 + uik[11] * u23; 202 dk[20] += uik[12] * u18 + uik[13] * u19 + uik[14] * u20 + uik[15] * u21 + uik[16] * u22 + uik[17] * u23; 203 dk[21] += uik[18] * u18 + uik[19] * u19 + uik[20] * u20 + uik[21] * u21 + uik[22] * u22 + uik[23] * u23; 204 dk[22] += uik[24] * u18 + uik[25] * u19 + uik[26] * u20 + uik[27] * u21 + uik[28] * u22 + uik[29] * u23; 205 dk[23] += uik[30] * u18 + uik[31] * u19 + uik[32] * u20 + uik[33] * u21 + uik[34] * u22 + uik[35] * u23; 206 207 dk[24] += uik[0] * u24 + uik[1] * u25 + uik[2] * u26 + uik[3] * u27 + uik[4] * u28 + uik[5] * u29; 208 dk[25] += uik[6] * u24 + uik[7] * u25 + uik[8] * u26 + uik[9] * u27 + uik[10] * u28 + uik[11] * u29; 209 dk[26] += uik[12] * u24 + uik[13] * u25 + uik[14] * u26 + uik[15] * u27 + uik[16] * u28 + uik[17] * u29; 210 dk[27] += uik[18] * u24 + uik[19] * u25 + uik[20] * u26 + uik[21] * u27 + uik[22] * u28 + uik[23] * u29; 211 dk[28] += uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27 + uik[28] * u28 + uik[29] * u29; 212 dk[29] += uik[30] * u24 + uik[31] * u25 + uik[32] * u26 + uik[33] * u27 + uik[34] * u28 + uik[35] * u29; 213 214 dk[30] += uik[0] * u30 + uik[1] * u31 + uik[2] * u32 + uik[3] * u33 + uik[4] * u34 + uik[5] * u35; 215 dk[31] += uik[6] * u30 + uik[7] * u31 + uik[8] * u32 + uik[9] * u33 + uik[10] * u34 + uik[11] * u35; 216 dk[32] += uik[12] * u30 + uik[13] * u31 + uik[14] * u32 + uik[15] * u33 + uik[16] * u34 + uik[17] * u35; 217 dk[33] += uik[18] * u30 + uik[19] * u31 + uik[20] * u32 + uik[21] * u33 + uik[22] * u34 + uik[23] * u35; 218 dk[34] += uik[24] * u30 + uik[25] * u31 + uik[26] * u32 + uik[27] * u33 + uik[28] * u34 + uik[29] * u35; 219 dk[35] += uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34 + uik[35] * u35; 220 221 PetscCall(PetscLogFlops(216.0 * 4.0)); 222 223 /* update -U(i,k) */ 224 PetscCall(PetscArraycpy(ba + ili * 36, uik, 36)); 225 226 /* add multiple of row i to k-th row ... */ 227 jmin = ili + 1; 228 jmax = bi[i + 1]; 229 if (jmin < jmax) { 230 for (j = jmin; j < jmax; j++) { 231 /* w += -U(i,k)^T * U_bar(i,j) */ 232 wp = w + bj[j] * 36; 233 u = ba + j * 36; 234 235 u0 = u[0]; 236 u1 = u[1]; 237 u2 = u[2]; 238 u3 = u[3]; 239 u4 = u[4]; 240 u5 = u[5]; 241 u6 = u[6]; 242 u7 = u[7]; 243 u8 = u[8]; 244 u9 = u[9]; 245 u10 = u[10]; 246 u11 = u[11]; 247 u12 = u[12]; 248 u13 = u[13]; 249 u14 = u[14]; 250 u15 = u[15]; 251 u16 = u[16]; 252 u17 = u[17]; 253 u18 = u[18]; 254 u19 = u[19]; 255 u20 = u[20]; 256 u21 = u[21]; 257 u22 = u[22]; 258 u23 = u[23]; 259 u24 = u[24]; 260 u25 = u[25]; 261 u26 = u[26]; 262 u27 = u[27]; 263 u28 = u[28]; 264 u29 = u[29]; 265 u30 = u[30]; 266 u31 = u[31]; 267 u32 = u[32]; 268 u33 = u[33]; 269 u34 = u[34]; 270 u35 = u[35]; 271 272 wp[0] += uik[0] * u0 + uik[1] * u1 + uik[2] * u2 + uik[3] * u3 + uik[4] * u4 + uik[5] * u5; 273 wp[1] += uik[6] * u0 + uik[7] * u1 + uik[8] * u2 + uik[9] * u3 + uik[10] * u4 + uik[11] * u5; 274 wp[2] += uik[12] * u0 + uik[13] * u1 + uik[14] * u2 + uik[15] * u3 + uik[16] * u4 + uik[17] * u5; 275 wp[3] += uik[18] * u0 + uik[19] * u1 + uik[20] * u2 + uik[21] * u3 + uik[22] * u4 + uik[23] * u5; 276 wp[4] += uik[24] * u0 + uik[25] * u1 + uik[26] * u2 + uik[27] * u3 + uik[28] * u4 + uik[29] * u5; 277 wp[5] += uik[30] * u0 + uik[31] * u1 + uik[32] * u2 + uik[33] * u3 + uik[34] * u4 + uik[35] * u5; 278 279 wp[6] += uik[0] * u6 + uik[1] * u7 + uik[2] * u8 + uik[3] * u9 + uik[4] * u10 + uik[5] * u11; 280 wp[7] += uik[6] * u6 + uik[7] * u7 + uik[8] * u8 + uik[9] * u9 + uik[10] * u10 + uik[11] * u11; 281 wp[8] += uik[12] * u6 + uik[13] * u7 + uik[14] * u8 + uik[15] * u9 + uik[16] * u10 + uik[17] * u11; 282 wp[9] += uik[18] * u6 + uik[19] * u7 + uik[20] * u8 + uik[21] * u9 + uik[22] * u10 + uik[23] * u11; 283 wp[10] += uik[24] * u6 + uik[25] * u7 + uik[26] * u8 + uik[27] * u9 + uik[28] * u10 + uik[29] * u11; 284 wp[11] += uik[30] * u6 + uik[31] * u7 + uik[32] * u8 + uik[33] * u9 + uik[34] * u10 + uik[35] * u11; 285 286 wp[12] += uik[0] * u12 + uik[1] * u13 + uik[2] * u14 + uik[3] * u15 + uik[4] * u16 + uik[5] * u17; 287 wp[13] += uik[6] * u12 + uik[7] * u13 + uik[8] * u14 + uik[9] * u15 + uik[10] * u16 + uik[11] * u17; 288 wp[14] += uik[12] * u12 + uik[13] * u13 + uik[14] * u14 + uik[15] * u15 + uik[16] * u16 + uik[17] * u17; 289 wp[15] += uik[18] * u12 + uik[19] * u13 + uik[20] * u14 + uik[21] * u15 + uik[22] * u16 + uik[23] * u17; 290 wp[16] += uik[24] * u12 + uik[25] * u13 + uik[26] * u14 + uik[27] * u15 + uik[28] * u16 + uik[29] * u17; 291 wp[17] += uik[30] * u12 + uik[31] * u13 + uik[32] * u14 + uik[33] * u15 + uik[34] * u16 + uik[35] * u17; 292 293 wp[18] += uik[0] * u18 + uik[1] * u19 + uik[2] * u20 + uik[3] * u21 + uik[4] * u22 + uik[5] * u23; 294 wp[19] += uik[6] * u18 + uik[7] * u19 + uik[8] * u20 + uik[9] * u21 + uik[10] * u22 + uik[11] * u23; 295 wp[20] += uik[12] * u18 + uik[13] * u19 + uik[14] * u20 + uik[15] * u21 + uik[16] * u22 + uik[17] * u23; 296 wp[21] += uik[18] * u18 + uik[19] * u19 + uik[20] * u20 + uik[21] * u21 + uik[22] * u22 + uik[23] * u23; 297 wp[22] += uik[24] * u18 + uik[25] * u19 + uik[26] * u20 + uik[27] * u21 + uik[28] * u22 + uik[29] * u23; 298 wp[23] += uik[30] * u18 + uik[31] * u19 + uik[32] * u20 + uik[33] * u21 + uik[34] * u22 + uik[35] * u23; 299 300 wp[24] += uik[0] * u24 + uik[1] * u25 + uik[2] * u26 + uik[3] * u27 + uik[4] * u28 + uik[5] * u29; 301 wp[25] += uik[6] * u24 + uik[7] * u25 + uik[8] * u26 + uik[9] * u27 + uik[10] * u28 + uik[11] * u29; 302 wp[26] += uik[12] * u24 + uik[13] * u25 + uik[14] * u26 + uik[15] * u27 + uik[16] * u28 + uik[17] * u29; 303 wp[27] += uik[18] * u24 + uik[19] * u25 + uik[20] * u26 + uik[21] * u27 + uik[22] * u28 + uik[23] * u29; 304 wp[28] += uik[24] * u24 + uik[25] * u25 + uik[26] * u26 + uik[27] * u27 + uik[28] * u28 + uik[29] * u29; 305 wp[29] += uik[30] * u24 + uik[31] * u25 + uik[32] * u26 + uik[33] * u27 + uik[34] * u28 + uik[35] * u29; 306 307 wp[30] += uik[0] * u30 + uik[1] * u31 + uik[2] * u32 + uik[3] * u33 + uik[4] * u34 + uik[5] * u35; 308 wp[31] += uik[6] * u30 + uik[7] * u31 + uik[8] * u32 + uik[9] * u33 + uik[10] * u34 + uik[11] * u35; 309 wp[32] += uik[12] * u30 + uik[13] * u31 + uik[14] * u32 + uik[15] * u33 + uik[16] * u34 + uik[17] * u35; 310 wp[33] += uik[18] * u30 + uik[19] * u31 + uik[20] * u32 + uik[21] * u33 + uik[22] * u34 + uik[23] * u35; 311 wp[34] += uik[24] * u30 + uik[25] * u31 + uik[26] * u32 + uik[27] * u33 + uik[28] * u34 + uik[29] * u35; 312 wp[35] += uik[30] * u30 + uik[31] * u31 + uik[32] * u32 + uik[33] * u33 + uik[34] * u34 + uik[35] * u35; 313 } 314 PetscCall(PetscLogFlops(2.0 * 216.0 * (jmax - jmin))); 315 316 /* ... add i to row list for next nonzero entry */ 317 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 318 j = bj[jmin]; 319 jl[i] = jl[j]; 320 jl[j] = i; /* update jl */ 321 } 322 i = nexti; 323 } 324 325 /* save nonzero entries in k-th row of U ... */ 326 327 /* invert diagonal block */ 328 d = ba + k * 36; 329 PetscCall(PetscArraycpy(d, dk, 36)); 330 PetscCall(PetscKernel_A_gets_inverse_A_6(d, shift, allowzeropivot, &zeropivotdetected)); 331 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 332 333 jmin = bi[k]; 334 jmax = bi[k + 1]; 335 if (jmin < jmax) { 336 for (j = jmin; j < jmax; j++) { 337 vj = bj[j]; /* block col. index of U */ 338 u = ba + j * 36; 339 wp = w + vj * 36; 340 for (k1 = 0; k1 < 36; k1++) { 341 *u++ = *wp; 342 *wp++ = 0.0; 343 } 344 } 345 346 /* ... add k to row list for first nonzero entry in k-th row */ 347 il[k] = jmin; 348 i = bj[jmin]; 349 jl[k] = jl[i]; 350 jl[i] = k; 351 } 352 } 353 354 PetscCall(PetscFree(w)); 355 PetscCall(PetscFree2(il, jl)); 356 PetscCall(PetscFree2(dk, uik)); 357 if (a->permute) PetscCall(PetscFree(aa)); 358 359 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 360 361 C->ops->solve = MatSolve_SeqSBAIJ_6_inplace; 362 C->ops->solvetranspose = MatSolve_SeqSBAIJ_6_inplace; 363 C->assembled = PETSC_TRUE; 364 C->preallocated = PETSC_TRUE; 365 366 PetscCall(PetscLogFlops(1.3333 * 216 * b->mbs)); /* from inverting diagonal blocks */ 367 PetscFunctionReturn(0); 368 } 369