1 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <petscis.h> 6 7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 for (i = 0; i < mbs; i++) { 18 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 19 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 20 fi++; 21 } 22 if (nneg) *nneg = nneg_tmp; 23 if (npos) *npos = npos_tmp; 24 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 25 PetscFunctionReturn(0); 26 } 27 28 /* 29 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 30 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 31 */ 32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) { 33 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 34 const PetscInt *rip, *ai, *aj; 35 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 36 PetscInt m, reallocs = 0, prow; 37 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 38 PetscReal f = info->fill; 39 PetscBool perm_identity; 40 41 PetscFunctionBegin; 42 /* check whether perm is the identity mapping */ 43 PetscCall(ISIdentity(perm, &perm_identity)); 44 PetscCall(ISGetIndices(perm, &rip)); 45 46 if (perm_identity) { /* without permutation */ 47 a->permute = PETSC_FALSE; 48 49 ai = a->i; 50 aj = a->j; 51 } else { /* non-trivial permutation */ 52 a->permute = PETSC_TRUE; 53 54 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 55 56 ai = a->inew; 57 aj = a->jnew; 58 } 59 60 /* initialization */ 61 PetscCall(PetscMalloc1(mbs + 1, &iu)); 62 umax = (PetscInt)(f * ai[mbs] + 1); 63 umax += mbs + 1; 64 PetscCall(PetscMalloc1(umax, &ju)); 65 iu[0] = mbs + 1; 66 juidx = mbs + 1; /* index for ju */ 67 /* jl linked list for pivot row -- linked list for col index */ 68 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 69 for (i = 0; i < mbs; i++) { 70 jl[i] = mbs; 71 q[i] = 0; 72 } 73 74 /* for each row k */ 75 for (k = 0; k < mbs; k++) { 76 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 77 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 78 q[k] = mbs; 79 /* initialize nonzero structure of k-th row to row rip[k] of A */ 80 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 81 jmax = ai[rip[k] + 1]; 82 for (j = jmin; j < jmax; j++) { 83 vj = rip[aj[j]]; /* col. value */ 84 if (vj > k) { 85 qm = k; 86 do { 87 m = qm; 88 qm = q[m]; 89 } while (qm < vj); 90 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 91 nzk++; 92 q[m] = vj; 93 q[vj] = qm; 94 } /* if (vj > k) */ 95 } /* for (j=jmin; j<jmax; j++) */ 96 97 /* modify nonzero structure of k-th row by computing fill-in 98 for each row i to be merged in */ 99 prow = k; 100 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 101 102 while (prow < k) { 103 /* merge row prow into k-th row */ 104 jmin = iu[prow] + 1; 105 jmax = iu[prow + 1]; 106 qm = k; 107 for (j = jmin; j < jmax; j++) { 108 vj = ju[j]; 109 do { 110 m = qm; 111 qm = q[m]; 112 } while (qm < vj); 113 if (qm != vj) { 114 nzk++; 115 q[m] = vj; 116 q[vj] = qm; 117 qm = vj; 118 } 119 } 120 prow = jl[prow]; /* next pivot row */ 121 } 122 123 /* add k to row list for first nonzero element in k-th row */ 124 if (nzk > 0) { 125 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 126 jl[k] = jl[i]; 127 jl[i] = k; 128 } 129 iu[k + 1] = iu[k] + nzk; 130 131 /* allocate more space to ju if needed */ 132 if (iu[k + 1] > umax) { 133 /* estimate how much additional space we will need */ 134 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 135 /* just double the memory each time */ 136 maxadd = umax; 137 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 138 umax += maxadd; 139 140 /* allocate a longer ju */ 141 PetscCall(PetscMalloc1(umax, &jutmp)); 142 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 143 PetscCall(PetscFree(ju)); 144 ju = jutmp; 145 reallocs++; /* count how many times we realloc */ 146 } 147 148 /* save nonzero structure of k-th row in ju */ 149 i = k; 150 while (nzk--) { 151 i = q[i]; 152 ju[juidx++] = i; 153 } 154 } 155 156 #if defined(PETSC_USE_INFO) 157 if (ai[mbs] != 0) { 158 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 159 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 160 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 161 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 162 PetscCall(PetscInfo(A, "for best performance.\n")); 163 } else { 164 PetscCall(PetscInfo(A, "Empty matrix\n")); 165 } 166 #endif 167 168 PetscCall(ISRestoreIndices(perm, &rip)); 169 PetscCall(PetscFree2(jl, q)); 170 171 /* put together the new matrix */ 172 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 173 174 b = (Mat_SeqSBAIJ *)(F)->data; 175 b->singlemalloc = PETSC_FALSE; 176 b->free_a = PETSC_TRUE; 177 b->free_ij = PETSC_TRUE; 178 179 PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a)); 180 b->j = ju; 181 b->i = iu; 182 b->diag = NULL; 183 b->ilen = NULL; 184 b->imax = NULL; 185 b->row = perm; 186 187 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 188 189 PetscCall(PetscObjectReference((PetscObject)perm)); 190 191 b->icol = perm; 192 PetscCall(PetscObjectReference((PetscObject)perm)); 193 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 194 /* In b structure: Free imax, ilen, old a, old j. 195 Allocate idnew, solve_work, new a, new j */ 196 b->maxnz = b->nz = iu[mbs]; 197 198 (F)->info.factor_mallocs = reallocs; 199 (F)->info.fill_ratio_given = f; 200 if (ai[mbs] != 0) { 201 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 202 } else { 203 (F)->info.fill_ratio_needed = 0.0; 204 } 205 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 206 PetscFunctionReturn(0); 207 } 208 /* 209 Symbolic U^T*D*U factorization for SBAIJ format. 210 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 211 */ 212 #include <petscbt.h> 213 #include <../src/mat/utils/freespace.h> 214 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 215 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 216 Mat_SeqSBAIJ *b; 217 PetscBool perm_identity, missing; 218 PetscReal fill = info->fill; 219 const PetscInt *rip, *ai = a->i, *aj = a->j; 220 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 221 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 222 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 223 PetscFreeSpaceList free_space = NULL, current_space = NULL; 224 PetscBT lnkbt; 225 226 PetscFunctionBegin; 227 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 228 PetscCall(MatMissingDiagonal(A, &missing, &i)); 229 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 230 if (bs > 1) { 231 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 232 PetscFunctionReturn(0); 233 } 234 235 /* check whether perm is the identity mapping */ 236 PetscCall(ISIdentity(perm, &perm_identity)); 237 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 238 a->permute = PETSC_FALSE; 239 PetscCall(ISGetIndices(perm, &rip)); 240 241 /* initialization */ 242 PetscCall(PetscMalloc1(mbs + 1, &ui)); 243 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 244 ui[0] = 0; 245 246 /* jl: linked list for storing indices of the pivot rows 247 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 248 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 249 for (i = 0; i < mbs; i++) { 250 jl[i] = mbs; 251 il[i] = 0; 252 } 253 254 /* create and initialize a linked list for storing column indices of the active row k */ 255 nlnk = mbs + 1; 256 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 257 258 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 259 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 260 current_space = free_space; 261 262 for (k = 0; k < mbs; k++) { /* for each active row k */ 263 /* initialize lnk by the column indices of row rip[k] of A */ 264 nzk = 0; 265 ncols = ai[k + 1] - ai[k]; 266 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 267 for (j = 0; j < ncols; j++) { 268 i = *(aj + ai[k] + j); 269 cols[j] = i; 270 } 271 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 272 nzk += nlnk; 273 274 /* update lnk by computing fill-in for each pivot row to be merged in */ 275 prow = jl[k]; /* 1st pivot row */ 276 277 while (prow < k) { 278 nextprow = jl[prow]; 279 /* merge prow into k-th row */ 280 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 281 jmax = ui[prow + 1]; 282 ncols = jmax - jmin; 283 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 284 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 285 nzk += nlnk; 286 287 /* update il and jl for prow */ 288 if (jmin < jmax) { 289 il[prow] = jmin; 290 j = *uj_ptr; 291 jl[prow] = jl[j]; 292 jl[j] = prow; 293 } 294 prow = nextprow; 295 } 296 297 /* if free space is not available, make more free space */ 298 if (current_space->local_remaining < nzk) { 299 i = mbs - k + 1; /* num of unfactored rows */ 300 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 301 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 302 reallocs++; 303 } 304 305 /* copy data into free space, then initialize lnk */ 306 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 307 308 /* add the k-th row into il and jl */ 309 if (nzk > 1) { 310 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 311 jl[k] = jl[i]; 312 jl[i] = k; 313 il[k] = ui[k] + 1; 314 } 315 ui_ptr[k] = current_space->array; 316 317 current_space->array += nzk; 318 current_space->local_used += nzk; 319 current_space->local_remaining -= nzk; 320 321 ui[k + 1] = ui[k] + nzk; 322 } 323 324 PetscCall(ISRestoreIndices(perm, &rip)); 325 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 326 327 /* destroy list of free space and other temporary array(s) */ 328 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 329 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 330 PetscCall(PetscLLDestroy(lnk, lnkbt)); 331 332 /* put together the new matrix in MATSEQSBAIJ format */ 333 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 334 335 b = (Mat_SeqSBAIJ *)fact->data; 336 b->singlemalloc = PETSC_FALSE; 337 b->free_a = PETSC_TRUE; 338 b->free_ij = PETSC_TRUE; 339 340 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 341 342 b->j = uj; 343 b->i = ui; 344 b->diag = udiag; 345 b->free_diag = PETSC_TRUE; 346 b->ilen = NULL; 347 b->imax = NULL; 348 b->row = perm; 349 b->icol = perm; 350 351 PetscCall(PetscObjectReference((PetscObject)perm)); 352 PetscCall(PetscObjectReference((PetscObject)perm)); 353 354 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 355 356 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 357 358 b->maxnz = b->nz = ui[mbs]; 359 360 fact->info.factor_mallocs = reallocs; 361 fact->info.fill_ratio_given = fill; 362 if (ai[mbs] != 0) { 363 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 364 } else { 365 fact->info.fill_ratio_needed = 0.0; 366 } 367 #if defined(PETSC_USE_INFO) 368 if (ai[mbs] != 0) { 369 PetscReal af = fact->info.fill_ratio_needed; 370 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 371 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 372 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 373 } else { 374 PetscCall(PetscInfo(A, "Empty matrix\n")); 375 } 376 #endif 377 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 382 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 383 Mat_SeqSBAIJ *b; 384 PetscBool perm_identity, missing; 385 PetscReal fill = info->fill; 386 const PetscInt *rip, *ai, *aj; 387 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 388 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 389 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 390 PetscFreeSpaceList free_space = NULL, current_space = NULL; 391 PetscBT lnkbt; 392 393 PetscFunctionBegin; 394 PetscCall(MatMissingDiagonal(A, &missing, &d)); 395 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 396 397 /* 398 This code originally uses Modified Sparse Row (MSR) storage 399 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 400 Then it is rewritten so the factor B takes seqsbaij format. However the associated 401 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 402 thus the original code in MSR format is still used for these cases. 403 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 404 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 405 */ 406 if (bs > 1) { 407 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 408 PetscFunctionReturn(0); 409 } 410 411 /* check whether perm is the identity mapping */ 412 PetscCall(ISIdentity(perm, &perm_identity)); 413 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 414 a->permute = PETSC_FALSE; 415 ai = a->i; 416 aj = a->j; 417 PetscCall(ISGetIndices(perm, &rip)); 418 419 /* initialization */ 420 PetscCall(PetscMalloc1(mbs + 1, &ui)); 421 ui[0] = 0; 422 423 /* jl: linked list for storing indices of the pivot rows 424 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 425 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 426 for (i = 0; i < mbs; i++) { 427 jl[i] = mbs; 428 il[i] = 0; 429 } 430 431 /* create and initialize a linked list for storing column indices of the active row k */ 432 nlnk = mbs + 1; 433 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 434 435 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 436 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 437 current_space = free_space; 438 439 for (k = 0; k < mbs; k++) { /* for each active row k */ 440 /* initialize lnk by the column indices of row rip[k] of A */ 441 nzk = 0; 442 ncols = ai[rip[k] + 1] - ai[rip[k]]; 443 for (j = 0; j < ncols; j++) { 444 i = *(aj + ai[rip[k]] + j); 445 cols[j] = rip[i]; 446 } 447 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 448 nzk += nlnk; 449 450 /* update lnk by computing fill-in for each pivot row to be merged in */ 451 prow = jl[k]; /* 1st pivot row */ 452 453 while (prow < k) { 454 nextprow = jl[prow]; 455 /* merge prow into k-th row */ 456 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 457 jmax = ui[prow + 1]; 458 ncols = jmax - jmin; 459 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 460 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 461 nzk += nlnk; 462 463 /* update il and jl for prow */ 464 if (jmin < jmax) { 465 il[prow] = jmin; 466 467 j = *uj_ptr; 468 jl[prow] = jl[j]; 469 jl[j] = prow; 470 } 471 prow = nextprow; 472 } 473 474 /* if free space is not available, make more free space */ 475 if (current_space->local_remaining < nzk) { 476 i = mbs - k + 1; /* num of unfactored rows */ 477 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 478 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 479 reallocs++; 480 } 481 482 /* copy data into free space, then initialize lnk */ 483 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 484 485 /* add the k-th row into il and jl */ 486 if (nzk - 1 > 0) { 487 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 488 jl[k] = jl[i]; 489 jl[i] = k; 490 il[k] = ui[k] + 1; 491 } 492 ui_ptr[k] = current_space->array; 493 494 current_space->array += nzk; 495 current_space->local_used += nzk; 496 current_space->local_remaining -= nzk; 497 498 ui[k + 1] = ui[k] + nzk; 499 } 500 501 PetscCall(ISRestoreIndices(perm, &rip)); 502 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 503 504 /* destroy list of free space and other temporary array(s) */ 505 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 506 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 507 PetscCall(PetscLLDestroy(lnk, lnkbt)); 508 509 /* put together the new matrix in MATSEQSBAIJ format */ 510 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 511 512 b = (Mat_SeqSBAIJ *)fact->data; 513 b->singlemalloc = PETSC_FALSE; 514 b->free_a = PETSC_TRUE; 515 b->free_ij = PETSC_TRUE; 516 517 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 518 519 b->j = uj; 520 b->i = ui; 521 b->diag = NULL; 522 b->ilen = NULL; 523 b->imax = NULL; 524 b->row = perm; 525 526 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 527 528 PetscCall(PetscObjectReference((PetscObject)perm)); 529 b->icol = perm; 530 PetscCall(PetscObjectReference((PetscObject)perm)); 531 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 532 b->maxnz = b->nz = ui[mbs]; 533 534 fact->info.factor_mallocs = reallocs; 535 fact->info.fill_ratio_given = fill; 536 if (ai[mbs] != 0) { 537 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 538 } else { 539 fact->info.fill_ratio_needed = 0.0; 540 } 541 #if defined(PETSC_USE_INFO) 542 if (ai[mbs] != 0) { 543 PetscReal af = fact->info.fill_ratio_needed; 544 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 545 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 546 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 547 } else { 548 PetscCall(PetscInfo(A, "Empty matrix\n")); 549 } 550 #endif 551 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 552 PetscFunctionReturn(0); 553 } 554 555 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) { 556 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 557 IS perm = b->row; 558 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 559 PetscInt i, j; 560 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 561 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 562 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 563 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 564 MatScalar *work; 565 PetscInt *pivots; 566 PetscBool allowzeropivot, zeropivotdetected; 567 568 PetscFunctionBegin; 569 /* initialization */ 570 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 571 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 572 allowzeropivot = PetscNot(A->erroriffailure); 573 574 il[0] = 0; 575 for (i = 0; i < mbs; i++) jl[i] = mbs; 576 577 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 578 PetscCall(PetscMalloc1(bs, &pivots)); 579 580 PetscCall(ISGetIndices(perm, &perm_ptr)); 581 582 /* check permutation */ 583 if (!a->permute) { 584 ai = a->i; 585 aj = a->j; 586 aa = a->a; 587 } else { 588 ai = a->inew; 589 aj = a->jnew; 590 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 591 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 592 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 593 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 594 595 for (i = 0; i < mbs; i++) { 596 jmin = ai[i]; 597 jmax = ai[i + 1]; 598 for (j = jmin; j < jmax; j++) { 599 while (a2anew[j] != j) { 600 k = a2anew[j]; 601 a2anew[j] = a2anew[k]; 602 a2anew[k] = k; 603 for (k1 = 0; k1 < bs2; k1++) { 604 dk[k1] = aa[k * bs2 + k1]; 605 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 606 aa[j * bs2 + k1] = dk[k1]; 607 } 608 } 609 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 610 if (i > aj[j]) { 611 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 612 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 613 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 614 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 615 } 616 } 617 } 618 } 619 PetscCall(PetscFree(a2anew)); 620 } 621 622 /* for each row k */ 623 for (k = 0; k < mbs; k++) { 624 /*initialize k-th row with elements nonzero in row perm(k) of A */ 625 jmin = ai[perm_ptr[k]]; 626 jmax = ai[perm_ptr[k] + 1]; 627 628 ap = aa + jmin * bs2; 629 for (j = jmin; j < jmax; j++) { 630 vj = perm_ptr[aj[j]]; /* block col. index */ 631 rtmp_ptr = rtmp + vj * bs2; 632 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 633 } 634 635 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 636 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 637 i = jl[k]; /* first row to be added to k_th row */ 638 639 while (i < k) { 640 nexti = jl[i]; /* next row to be added to k_th row */ 641 642 /* compute multiplier */ 643 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 644 645 /* uik = -inv(Di)*U_bar(i,k) */ 646 diag = ba + i * bs2; 647 u = ba + ili * bs2; 648 PetscCall(PetscArrayzero(uik, bs2)); 649 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 650 651 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 652 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 653 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 654 655 /* update -U(i,k) */ 656 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 657 658 /* add multiple of row i to k-th row ... */ 659 jmin = ili + 1; 660 jmax = bi[i + 1]; 661 if (jmin < jmax) { 662 for (j = jmin; j < jmax; j++) { 663 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 664 rtmp_ptr = rtmp + bj[j] * bs2; 665 u = ba + j * bs2; 666 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 667 } 668 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 669 670 /* ... add i to row list for next nonzero entry */ 671 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 672 j = bj[jmin]; 673 jl[i] = jl[j]; 674 jl[j] = i; /* update jl */ 675 } 676 i = nexti; 677 } 678 679 /* save nonzero entries in k-th row of U ... */ 680 681 /* invert diagonal block */ 682 diag = ba + k * bs2; 683 PetscCall(PetscArraycpy(diag, dk, bs2)); 684 685 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 686 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 687 688 jmin = bi[k]; 689 jmax = bi[k + 1]; 690 if (jmin < jmax) { 691 for (j = jmin; j < jmax; j++) { 692 vj = bj[j]; /* block col. index of U */ 693 u = ba + j * bs2; 694 rtmp_ptr = rtmp + vj * bs2; 695 for (k1 = 0; k1 < bs2; k1++) { 696 *u++ = *rtmp_ptr; 697 *rtmp_ptr++ = 0.0; 698 } 699 } 700 701 /* ... add k to row list for first nonzero entry in k-th row */ 702 il[k] = jmin; 703 i = bj[jmin]; 704 jl[k] = jl[i]; 705 jl[i] = k; 706 } 707 } 708 709 PetscCall(PetscFree(rtmp)); 710 PetscCall(PetscFree2(il, jl)); 711 PetscCall(PetscFree3(dk, uik, work)); 712 PetscCall(PetscFree(pivots)); 713 if (a->permute) PetscCall(PetscFree(aa)); 714 715 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 716 717 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 718 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 719 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 720 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 721 722 C->assembled = PETSC_TRUE; 723 C->preallocated = PETSC_TRUE; 724 725 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 726 PetscFunctionReturn(0); 727 } 728 729 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) { 730 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 731 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 732 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 733 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 734 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 735 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 736 MatScalar *work; 737 PetscInt *pivots; 738 PetscBool allowzeropivot, zeropivotdetected; 739 740 PetscFunctionBegin; 741 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 742 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 743 il[0] = 0; 744 for (i = 0; i < mbs; i++) jl[i] = mbs; 745 746 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 747 PetscCall(PetscMalloc1(bs, &pivots)); 748 allowzeropivot = PetscNot(A->erroriffailure); 749 750 ai = a->i; 751 aj = a->j; 752 aa = a->a; 753 754 /* for each row k */ 755 for (k = 0; k < mbs; k++) { 756 /*initialize k-th row with elements nonzero in row k of A */ 757 jmin = ai[k]; 758 jmax = ai[k + 1]; 759 ap = aa + jmin * bs2; 760 for (j = jmin; j < jmax; j++) { 761 vj = aj[j]; /* block col. index */ 762 rtmp_ptr = rtmp + vj * bs2; 763 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 764 } 765 766 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 767 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 768 i = jl[k]; /* first row to be added to k_th row */ 769 770 while (i < k) { 771 nexti = jl[i]; /* next row to be added to k_th row */ 772 773 /* compute multiplier */ 774 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 775 776 /* uik = -inv(Di)*U_bar(i,k) */ 777 diag = ba + i * bs2; 778 u = ba + ili * bs2; 779 PetscCall(PetscArrayzero(uik, bs2)); 780 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 781 782 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 783 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 784 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 785 786 /* update -U(i,k) */ 787 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 788 789 /* add multiple of row i to k-th row ... */ 790 jmin = ili + 1; 791 jmax = bi[i + 1]; 792 if (jmin < jmax) { 793 for (j = jmin; j < jmax; j++) { 794 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 795 rtmp_ptr = rtmp + bj[j] * bs2; 796 u = ba + j * bs2; 797 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 798 } 799 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 800 801 /* ... add i to row list for next nonzero entry */ 802 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 803 j = bj[jmin]; 804 jl[i] = jl[j]; 805 jl[j] = i; /* update jl */ 806 } 807 i = nexti; 808 } 809 810 /* save nonzero entries in k-th row of U ... */ 811 812 /* invert diagonal block */ 813 diag = ba + k * bs2; 814 PetscCall(PetscArraycpy(diag, dk, bs2)); 815 816 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 817 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 818 819 jmin = bi[k]; 820 jmax = bi[k + 1]; 821 if (jmin < jmax) { 822 for (j = jmin; j < jmax; j++) { 823 vj = bj[j]; /* block col. index of U */ 824 u = ba + j * bs2; 825 rtmp_ptr = rtmp + vj * bs2; 826 for (k1 = 0; k1 < bs2; k1++) { 827 *u++ = *rtmp_ptr; 828 *rtmp_ptr++ = 0.0; 829 } 830 } 831 832 /* ... add k to row list for first nonzero entry in k-th row */ 833 il[k] = jmin; 834 i = bj[jmin]; 835 jl[k] = jl[i]; 836 jl[i] = k; 837 } 838 } 839 840 PetscCall(PetscFree(rtmp)); 841 PetscCall(PetscFree2(il, jl)); 842 PetscCall(PetscFree3(dk, uik, work)); 843 PetscCall(PetscFree(pivots)); 844 845 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 846 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 847 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 848 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 849 C->assembled = PETSC_TRUE; 850 C->preallocated = PETSC_TRUE; 851 852 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 853 PetscFunctionReturn(0); 854 } 855 856 /* 857 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 858 Version for blocks 2 by 2. 859 */ 860 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) { 861 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 862 IS perm = b->row; 863 const PetscInt *ai, *aj, *perm_ptr; 864 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 865 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 866 MatScalar *ba = b->a, *aa, *ap; 867 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 868 PetscReal shift = info->shiftamount; 869 PetscBool allowzeropivot, zeropivotdetected; 870 871 PetscFunctionBegin; 872 allowzeropivot = PetscNot(A->erroriffailure); 873 874 /* initialization */ 875 /* il and jl record the first nonzero element in each row of the accessing 876 window U(0:k, k:mbs-1). 877 jl: list of rows to be added to uneliminated rows 878 i>= k: jl(i) is the first row to be added to row i 879 i< k: jl(i) is the row following row i in some list of rows 880 jl(i) = mbs indicates the end of a list 881 il(i): points to the first nonzero element in columns k,...,mbs-1 of 882 row i of U */ 883 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 884 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 885 il[0] = 0; 886 for (i = 0; i < mbs; i++) jl[i] = mbs; 887 888 PetscCall(ISGetIndices(perm, &perm_ptr)); 889 890 /* check permutation */ 891 if (!a->permute) { 892 ai = a->i; 893 aj = a->j; 894 aa = a->a; 895 } else { 896 ai = a->inew; 897 aj = a->jnew; 898 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 899 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 900 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 901 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 902 903 for (i = 0; i < mbs; i++) { 904 jmin = ai[i]; 905 jmax = ai[i + 1]; 906 for (j = jmin; j < jmax; j++) { 907 while (a2anew[j] != j) { 908 k = a2anew[j]; 909 a2anew[j] = a2anew[k]; 910 a2anew[k] = k; 911 for (k1 = 0; k1 < 4; k1++) { 912 dk[k1] = aa[k * 4 + k1]; 913 aa[k * 4 + k1] = aa[j * 4 + k1]; 914 aa[j * 4 + k1] = dk[k1]; 915 } 916 } 917 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 918 if (i > aj[j]) { 919 ap = aa + j * 4; /* ptr to the beginning of the block */ 920 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 921 ap[1] = ap[2]; 922 ap[2] = dk[1]; 923 } 924 } 925 } 926 PetscCall(PetscFree(a2anew)); 927 } 928 929 /* for each row k */ 930 for (k = 0; k < mbs; k++) { 931 /*initialize k-th row with elements nonzero in row perm(k) of A */ 932 jmin = ai[perm_ptr[k]]; 933 jmax = ai[perm_ptr[k] + 1]; 934 ap = aa + jmin * 4; 935 for (j = jmin; j < jmax; j++) { 936 vj = perm_ptr[aj[j]]; /* block col. index */ 937 rtmp_ptr = rtmp + vj * 4; 938 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 939 } 940 941 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 942 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 943 i = jl[k]; /* first row to be added to k_th row */ 944 945 while (i < k) { 946 nexti = jl[i]; /* next row to be added to k_th row */ 947 948 /* compute multiplier */ 949 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 950 951 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 952 diag = ba + i * 4; 953 u = ba + ili * 4; 954 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 955 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 956 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 957 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 958 959 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 960 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 961 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 962 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 963 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 964 965 PetscCall(PetscLogFlops(16.0 * 2.0)); 966 967 /* update -U(i,k): ba[ili] = uik */ 968 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 969 970 /* add multiple of row i to k-th row ... */ 971 jmin = ili + 1; 972 jmax = bi[i + 1]; 973 if (jmin < jmax) { 974 for (j = jmin; j < jmax; j++) { 975 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 976 rtmp_ptr = rtmp + bj[j] * 4; 977 u = ba + j * 4; 978 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 979 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 980 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 981 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 982 } 983 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 984 985 /* ... add i to row list for next nonzero entry */ 986 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 987 j = bj[jmin]; 988 jl[i] = jl[j]; 989 jl[j] = i; /* update jl */ 990 } 991 i = nexti; 992 } 993 994 /* save nonzero entries in k-th row of U ... */ 995 996 /* invert diagonal block */ 997 diag = ba + k * 4; 998 PetscCall(PetscArraycpy(diag, dk, 4)); 999 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1000 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1001 1002 jmin = bi[k]; 1003 jmax = bi[k + 1]; 1004 if (jmin < jmax) { 1005 for (j = jmin; j < jmax; j++) { 1006 vj = bj[j]; /* block col. index of U */ 1007 u = ba + j * 4; 1008 rtmp_ptr = rtmp + vj * 4; 1009 for (k1 = 0; k1 < 4; k1++) { 1010 *u++ = *rtmp_ptr; 1011 *rtmp_ptr++ = 0.0; 1012 } 1013 } 1014 1015 /* ... add k to row list for first nonzero entry in k-th row */ 1016 il[k] = jmin; 1017 i = bj[jmin]; 1018 jl[k] = jl[i]; 1019 jl[i] = k; 1020 } 1021 } 1022 1023 PetscCall(PetscFree(rtmp)); 1024 PetscCall(PetscFree2(il, jl)); 1025 if (a->permute) PetscCall(PetscFree(aa)); 1026 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1027 1028 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1029 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1030 C->assembled = PETSC_TRUE; 1031 C->preallocated = PETSC_TRUE; 1032 1033 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1034 PetscFunctionReturn(0); 1035 } 1036 1037 /* 1038 Version for when blocks are 2 by 2 Using natural ordering 1039 */ 1040 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) { 1041 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1042 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1043 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1044 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1045 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1046 PetscReal shift = info->shiftamount; 1047 PetscBool allowzeropivot, zeropivotdetected; 1048 1049 PetscFunctionBegin; 1050 allowzeropivot = PetscNot(A->erroriffailure); 1051 1052 /* initialization */ 1053 /* il and jl record the first nonzero element in each row of the accessing 1054 window U(0:k, k:mbs-1). 1055 jl: list of rows to be added to uneliminated rows 1056 i>= k: jl(i) is the first row to be added to row i 1057 i< k: jl(i) is the row following row i in some list of rows 1058 jl(i) = mbs indicates the end of a list 1059 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1060 row i of U */ 1061 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1062 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1063 il[0] = 0; 1064 for (i = 0; i < mbs; i++) jl[i] = mbs; 1065 1066 ai = a->i; 1067 aj = a->j; 1068 aa = a->a; 1069 1070 /* for each row k */ 1071 for (k = 0; k < mbs; k++) { 1072 /*initialize k-th row with elements nonzero in row k of A */ 1073 jmin = ai[k]; 1074 jmax = ai[k + 1]; 1075 ap = aa + jmin * 4; 1076 for (j = jmin; j < jmax; j++) { 1077 vj = aj[j]; /* block col. index */ 1078 rtmp_ptr = rtmp + vj * 4; 1079 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1080 } 1081 1082 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1083 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1084 i = jl[k]; /* first row to be added to k_th row */ 1085 1086 while (i < k) { 1087 nexti = jl[i]; /* next row to be added to k_th row */ 1088 1089 /* compute multiplier */ 1090 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1091 1092 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1093 diag = ba + i * 4; 1094 u = ba + ili * 4; 1095 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1096 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1097 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1098 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1099 1100 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1101 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1102 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1103 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1104 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1105 1106 PetscCall(PetscLogFlops(16.0 * 2.0)); 1107 1108 /* update -U(i,k): ba[ili] = uik */ 1109 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1110 1111 /* add multiple of row i to k-th row ... */ 1112 jmin = ili + 1; 1113 jmax = bi[i + 1]; 1114 if (jmin < jmax) { 1115 for (j = jmin; j < jmax; j++) { 1116 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1117 rtmp_ptr = rtmp + bj[j] * 4; 1118 u = ba + j * 4; 1119 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1120 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1121 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1122 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1123 } 1124 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1125 1126 /* ... add i to row list for next nonzero entry */ 1127 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1128 j = bj[jmin]; 1129 jl[i] = jl[j]; 1130 jl[j] = i; /* update jl */ 1131 } 1132 i = nexti; 1133 } 1134 1135 /* save nonzero entries in k-th row of U ... */ 1136 1137 /* invert diagonal block */ 1138 diag = ba + k * 4; 1139 PetscCall(PetscArraycpy(diag, dk, 4)); 1140 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1141 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1142 1143 jmin = bi[k]; 1144 jmax = bi[k + 1]; 1145 if (jmin < jmax) { 1146 for (j = jmin; j < jmax; j++) { 1147 vj = bj[j]; /* block col. index of U */ 1148 u = ba + j * 4; 1149 rtmp_ptr = rtmp + vj * 4; 1150 for (k1 = 0; k1 < 4; k1++) { 1151 *u++ = *rtmp_ptr; 1152 *rtmp_ptr++ = 0.0; 1153 } 1154 } 1155 1156 /* ... add k to row list for first nonzero entry in k-th row */ 1157 il[k] = jmin; 1158 i = bj[jmin]; 1159 jl[k] = jl[i]; 1160 jl[i] = k; 1161 } 1162 } 1163 1164 PetscCall(PetscFree(rtmp)); 1165 PetscCall(PetscFree2(il, jl)); 1166 1167 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1168 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1169 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1170 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1171 C->assembled = PETSC_TRUE; 1172 C->preallocated = PETSC_TRUE; 1173 1174 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1175 PetscFunctionReturn(0); 1176 } 1177 1178 /* 1179 Numeric U^T*D*U factorization for SBAIJ format. 1180 Version for blocks are 1 by 1. 1181 */ 1182 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) { 1183 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1184 IS ip = b->row; 1185 const PetscInt *ai, *aj, *rip; 1186 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1187 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1188 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1189 PetscReal rs; 1190 FactorShiftCtx sctx; 1191 1192 PetscFunctionBegin; 1193 /* MatPivotSetUp(): initialize shift context sctx */ 1194 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1195 1196 PetscCall(ISGetIndices(ip, &rip)); 1197 if (!a->permute) { 1198 ai = a->i; 1199 aj = a->j; 1200 aa = a->a; 1201 } else { 1202 ai = a->inew; 1203 aj = a->jnew; 1204 nz = ai[mbs]; 1205 PetscCall(PetscMalloc1(nz, &aa)); 1206 a2anew = a->a2anew; 1207 bval = a->a; 1208 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1209 } 1210 1211 /* initialization */ 1212 /* il and jl record the first nonzero element in each row of the accessing 1213 window U(0:k, k:mbs-1). 1214 jl: list of rows to be added to uneliminated rows 1215 i>= k: jl(i) is the first row to be added to row i 1216 i< k: jl(i) is the row following row i in some list of rows 1217 jl(i) = mbs indicates the end of a list 1218 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1219 row i of U */ 1220 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1221 1222 do { 1223 sctx.newshift = PETSC_FALSE; 1224 il[0] = 0; 1225 for (i = 0; i < mbs; i++) { 1226 rtmp[i] = 0.0; 1227 jl[i] = mbs; 1228 } 1229 1230 for (k = 0; k < mbs; k++) { 1231 /*initialize k-th row by the perm[k]-th row of A */ 1232 jmin = ai[rip[k]]; 1233 jmax = ai[rip[k] + 1]; 1234 bval = ba + bi[k]; 1235 for (j = jmin; j < jmax; j++) { 1236 col = rip[aj[j]]; 1237 rtmp[col] = aa[j]; 1238 *bval++ = 0.0; /* for in-place factorization */ 1239 } 1240 1241 /* shift the diagonal of the matrix */ 1242 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1243 1244 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1245 dk = rtmp[k]; 1246 i = jl[k]; /* first row to be added to k_th row */ 1247 1248 while (i < k) { 1249 nexti = jl[i]; /* next row to be added to k_th row */ 1250 1251 /* compute multiplier, update diag(k) and U(i,k) */ 1252 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1253 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1254 dk += uikdi * ba[ili]; 1255 ba[ili] = uikdi; /* -U(i,k) */ 1256 1257 /* add multiple of row i to k-th row */ 1258 jmin = ili + 1; 1259 jmax = bi[i + 1]; 1260 if (jmin < jmax) { 1261 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1262 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1263 1264 /* update il and jl for row i */ 1265 il[i] = jmin; 1266 j = bj[jmin]; 1267 jl[i] = jl[j]; 1268 jl[j] = i; 1269 } 1270 i = nexti; 1271 } 1272 1273 /* shift the diagonals when zero pivot is detected */ 1274 /* compute rs=sum of abs(off-diagonal) */ 1275 rs = 0.0; 1276 jmin = bi[k] + 1; 1277 nz = bi[k + 1] - jmin; 1278 if (nz) { 1279 bcol = bj + jmin; 1280 while (nz--) { 1281 rs += PetscAbsScalar(rtmp[*bcol]); 1282 bcol++; 1283 } 1284 } 1285 1286 sctx.rs = rs; 1287 sctx.pv = dk; 1288 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1289 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1290 dk = sctx.pv; 1291 1292 /* copy data into U(k,:) */ 1293 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1294 jmin = bi[k] + 1; 1295 jmax = bi[k + 1]; 1296 if (jmin < jmax) { 1297 for (j = jmin; j < jmax; j++) { 1298 col = bj[j]; 1299 ba[j] = rtmp[col]; 1300 rtmp[col] = 0.0; 1301 } 1302 /* add the k-th row into il and jl */ 1303 il[k] = jmin; 1304 i = bj[jmin]; 1305 jl[k] = jl[i]; 1306 jl[i] = k; 1307 } 1308 } 1309 } while (sctx.newshift); 1310 PetscCall(PetscFree3(rtmp, il, jl)); 1311 if (a->permute) PetscCall(PetscFree(aa)); 1312 1313 PetscCall(ISRestoreIndices(ip, &rip)); 1314 1315 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1316 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1317 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1318 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1319 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1320 C->assembled = PETSC_TRUE; 1321 C->preallocated = PETSC_TRUE; 1322 1323 PetscCall(PetscLogFlops(C->rmap->N)); 1324 if (sctx.nshift) { 1325 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1326 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1327 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1328 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1329 } 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* 1335 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1336 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1337 */ 1338 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) { 1339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1340 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1341 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1342 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1343 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1344 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1345 FactorShiftCtx sctx; 1346 PetscReal rs; 1347 MatScalar d, *v; 1348 1349 PetscFunctionBegin; 1350 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1351 1352 /* MatPivotSetUp(): initialize shift context sctx */ 1353 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1354 1355 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1356 sctx.shift_top = info->zeropivot; 1357 1358 PetscCall(PetscArrayzero(rtmp, mbs)); 1359 1360 for (i = 0; i < mbs; i++) { 1361 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1362 d = (aa)[a->diag[i]]; 1363 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1364 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1365 v = aa + ai[i] + 1; 1366 nz = ai[i + 1] - ai[i] - 1; 1367 for (j = 0; j < nz; j++) { 1368 rtmp[i] += PetscAbsScalar(v[j]); 1369 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1370 } 1371 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1372 } 1373 sctx.shift_top *= 1.1; 1374 sctx.nshift_max = 5; 1375 sctx.shift_lo = 0.; 1376 sctx.shift_hi = 1.; 1377 } 1378 1379 /* allocate working arrays 1380 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1381 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1382 */ 1383 do { 1384 sctx.newshift = PETSC_FALSE; 1385 1386 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1387 if (mbs) il[0] = 0; 1388 1389 for (k = 0; k < mbs; k++) { 1390 /* zero rtmp */ 1391 nz = bi[k + 1] - bi[k]; 1392 bjtmp = bj + bi[k]; 1393 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1394 1395 /* load in initial unfactored row */ 1396 bval = ba + bi[k]; 1397 jmin = ai[k]; 1398 jmax = ai[k + 1]; 1399 for (j = jmin; j < jmax; j++) { 1400 col = aj[j]; 1401 rtmp[col] = aa[j]; 1402 *bval++ = 0.0; /* for in-place factorization */ 1403 } 1404 /* shift the diagonal of the matrix: ZeropivotApply() */ 1405 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1406 1407 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1408 dk = rtmp[k]; 1409 i = c2r[k]; /* first row to be added to k_th row */ 1410 1411 while (i < k) { 1412 nexti = c2r[i]; /* next row to be added to k_th row */ 1413 1414 /* compute multiplier, update diag(k) and U(i,k) */ 1415 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1416 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1417 dk += uikdi * ba[ili]; /* update diag[k] */ 1418 ba[ili] = uikdi; /* -U(i,k) */ 1419 1420 /* add multiple of row i to k-th row */ 1421 jmin = ili + 1; 1422 jmax = bi[i + 1]; 1423 if (jmin < jmax) { 1424 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1425 /* update il and c2r for row i */ 1426 il[i] = jmin; 1427 j = bj[jmin]; 1428 c2r[i] = c2r[j]; 1429 c2r[j] = i; 1430 } 1431 i = nexti; 1432 } 1433 1434 /* copy data into U(k,:) */ 1435 rs = 0.0; 1436 jmin = bi[k]; 1437 jmax = bi[k + 1] - 1; 1438 if (jmin < jmax) { 1439 for (j = jmin; j < jmax; j++) { 1440 col = bj[j]; 1441 ba[j] = rtmp[col]; 1442 rs += PetscAbsScalar(ba[j]); 1443 } 1444 /* add the k-th row into il and c2r */ 1445 il[k] = jmin; 1446 i = bj[jmin]; 1447 c2r[k] = c2r[i]; 1448 c2r[i] = k; 1449 } 1450 1451 sctx.rs = rs; 1452 sctx.pv = dk; 1453 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1454 if (sctx.newshift) break; 1455 dk = sctx.pv; 1456 1457 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1458 } 1459 } while (sctx.newshift); 1460 1461 PetscCall(PetscFree3(rtmp, il, c2r)); 1462 1463 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1464 B->ops->solves = MatSolves_SeqSBAIJ_1; 1465 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1466 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1467 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1468 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1469 1470 B->assembled = PETSC_TRUE; 1471 B->preallocated = PETSC_TRUE; 1472 1473 PetscCall(PetscLogFlops(B->rmap->n)); 1474 1475 /* MatPivotView() */ 1476 if (sctx.nshift) { 1477 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1478 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1479 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1480 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1481 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1482 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1483 } 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) { 1489 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1490 PetscInt i, j, mbs = a->mbs; 1491 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1492 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1493 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1494 PetscReal rs; 1495 FactorShiftCtx sctx; 1496 1497 PetscFunctionBegin; 1498 /* MatPivotSetUp(): initialize shift context sctx */ 1499 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1500 1501 /* initialization */ 1502 /* il and jl record the first nonzero element in each row of the accessing 1503 window U(0:k, k:mbs-1). 1504 jl: list of rows to be added to uneliminated rows 1505 i>= k: jl(i) is the first row to be added to row i 1506 i< k: jl(i) is the row following row i in some list of rows 1507 jl(i) = mbs indicates the end of a list 1508 il(i): points to the first nonzero element in U(i,k:mbs-1) 1509 */ 1510 PetscCall(PetscMalloc1(mbs, &rtmp)); 1511 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1512 1513 do { 1514 sctx.newshift = PETSC_FALSE; 1515 il[0] = 0; 1516 for (i = 0; i < mbs; i++) { 1517 rtmp[i] = 0.0; 1518 jl[i] = mbs; 1519 } 1520 1521 for (k = 0; k < mbs; k++) { 1522 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1523 nz = ai[k + 1] - ai[k]; 1524 acol = aj + ai[k]; 1525 aval = aa + ai[k]; 1526 bval = ba + bi[k]; 1527 while (nz--) { 1528 rtmp[*acol++] = *aval++; 1529 *bval++ = 0.0; /* for in-place factorization */ 1530 } 1531 1532 /* shift the diagonal of the matrix */ 1533 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1534 1535 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1536 dk = rtmp[k]; 1537 i = jl[k]; /* first row to be added to k_th row */ 1538 1539 while (i < k) { 1540 nexti = jl[i]; /* next row to be added to k_th row */ 1541 /* compute multiplier, update D(k) and U(i,k) */ 1542 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1543 uikdi = -ba[ili] * ba[bi[i]]; 1544 dk += uikdi * ba[ili]; 1545 ba[ili] = uikdi; /* -U(i,k) */ 1546 1547 /* add multiple of row i to k-th row ... */ 1548 jmin = ili + 1; 1549 nz = bi[i + 1] - jmin; 1550 if (nz > 0) { 1551 bcol = bj + jmin; 1552 bval = ba + jmin; 1553 PetscCall(PetscLogFlops(2.0 * nz)); 1554 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1555 1556 /* update il and jl for i-th row */ 1557 il[i] = jmin; 1558 j = bj[jmin]; 1559 jl[i] = jl[j]; 1560 jl[j] = i; 1561 } 1562 i = nexti; 1563 } 1564 1565 /* shift the diagonals when zero pivot is detected */ 1566 /* compute rs=sum of abs(off-diagonal) */ 1567 rs = 0.0; 1568 jmin = bi[k] + 1; 1569 nz = bi[k + 1] - jmin; 1570 if (nz) { 1571 bcol = bj + jmin; 1572 while (nz--) { 1573 rs += PetscAbsScalar(rtmp[*bcol]); 1574 bcol++; 1575 } 1576 } 1577 1578 sctx.rs = rs; 1579 sctx.pv = dk; 1580 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1581 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1582 dk = sctx.pv; 1583 1584 /* copy data into U(k,:) */ 1585 ba[bi[k]] = 1.0 / dk; 1586 jmin = bi[k] + 1; 1587 nz = bi[k + 1] - jmin; 1588 if (nz) { 1589 bcol = bj + jmin; 1590 bval = ba + jmin; 1591 while (nz--) { 1592 *bval++ = rtmp[*bcol]; 1593 rtmp[*bcol++] = 0.0; 1594 } 1595 /* add k-th row into il and jl */ 1596 il[k] = jmin; 1597 i = bj[jmin]; 1598 jl[k] = jl[i]; 1599 jl[i] = k; 1600 } 1601 } /* end of for (k = 0; k<mbs; k++) */ 1602 } while (sctx.newshift); 1603 PetscCall(PetscFree(rtmp)); 1604 PetscCall(PetscFree2(il, jl)); 1605 1606 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1607 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1608 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1609 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1610 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1611 1612 C->assembled = PETSC_TRUE; 1613 C->preallocated = PETSC_TRUE; 1614 1615 PetscCall(PetscLogFlops(C->rmap->N)); 1616 if (sctx.nshift) { 1617 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1618 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1619 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1620 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1621 } 1622 } 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) { 1627 Mat C; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1631 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1632 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1633 1634 A->ops->solve = C->ops->solve; 1635 A->ops->solvetranspose = C->ops->solvetranspose; 1636 1637 PetscCall(MatHeaderMerge(A, &C)); 1638 PetscFunctionReturn(0); 1639 } 1640