1 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <petscis.h> 6 7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 8 { 9 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 10 MatScalar *dd = fact->a; 11 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 12 13 PetscFunctionBegin; 14 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 15 16 nneg_tmp = 0; 17 npos_tmp = 0; 18 for (i = 0; i < mbs; i++) { 19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 21 fi++; 22 } 23 if (nneg) *nneg = nneg_tmp; 24 if (npos) *npos = npos_tmp; 25 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 26 PetscFunctionReturn(PETSC_SUCCESS); 27 } 28 29 /* 30 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 31 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 32 */ 33 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) 34 { 35 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 36 const PetscInt *rip, *ai, *aj; 37 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 38 PetscInt m, reallocs = 0, prow; 39 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 40 PetscReal f = info->fill; 41 PetscBool perm_identity; 42 43 PetscFunctionBegin; 44 /* check whether perm is the identity mapping */ 45 PetscCall(ISIdentity(perm, &perm_identity)); 46 PetscCall(ISGetIndices(perm, &rip)); 47 48 if (perm_identity) { /* without permutation */ 49 a->permute = PETSC_FALSE; 50 51 ai = a->i; 52 aj = a->j; 53 } else { /* non-trivial permutation */ 54 a->permute = PETSC_TRUE; 55 56 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 57 58 ai = a->inew; 59 aj = a->jnew; 60 } 61 62 /* initialization */ 63 PetscCall(PetscMalloc1(mbs + 1, &iu)); 64 umax = (PetscInt)(f * ai[mbs] + 1); 65 umax += mbs + 1; 66 PetscCall(PetscMalloc1(umax, &ju)); 67 iu[0] = mbs + 1; 68 juidx = mbs + 1; /* index for ju */ 69 /* jl linked list for pivot row -- linked list for col index */ 70 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 71 for (i = 0; i < mbs; i++) { 72 jl[i] = mbs; 73 q[i] = 0; 74 } 75 76 /* for each row k */ 77 for (k = 0; k < mbs; k++) { 78 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 79 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 80 q[k] = mbs; 81 /* initialize nonzero structure of k-th row to row rip[k] of A */ 82 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 83 jmax = ai[rip[k] + 1]; 84 for (j = jmin; j < jmax; j++) { 85 vj = rip[aj[j]]; /* col. value */ 86 if (vj > k) { 87 qm = k; 88 do { 89 m = qm; 90 qm = q[m]; 91 } while (qm < vj); 92 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 93 nzk++; 94 q[m] = vj; 95 q[vj] = qm; 96 } /* if (vj > k) */ 97 } /* for (j=jmin; j<jmax; j++) */ 98 99 /* modify nonzero structure of k-th row by computing fill-in 100 for each row i to be merged in */ 101 prow = k; 102 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 103 104 while (prow < k) { 105 /* merge row prow into k-th row */ 106 jmin = iu[prow] + 1; 107 jmax = iu[prow + 1]; 108 qm = k; 109 for (j = jmin; j < jmax; j++) { 110 vj = ju[j]; 111 do { 112 m = qm; 113 qm = q[m]; 114 } while (qm < vj); 115 if (qm != vj) { 116 nzk++; 117 q[m] = vj; 118 q[vj] = qm; 119 qm = vj; 120 } 121 } 122 prow = jl[prow]; /* next pivot row */ 123 } 124 125 /* add k to row list for first nonzero element in k-th row */ 126 if (nzk > 0) { 127 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 128 jl[k] = jl[i]; 129 jl[i] = k; 130 } 131 iu[k + 1] = iu[k] + nzk; 132 133 /* allocate more space to ju if needed */ 134 if (iu[k + 1] > umax) { 135 /* estimate how much additional space we will need */ 136 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 137 /* just double the memory each time */ 138 maxadd = umax; 139 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 140 umax += maxadd; 141 142 /* allocate a longer ju */ 143 PetscCall(PetscMalloc1(umax, &jutmp)); 144 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 145 PetscCall(PetscFree(ju)); 146 ju = jutmp; 147 reallocs++; /* count how many times we realloc */ 148 } 149 150 /* save nonzero structure of k-th row in ju */ 151 i = k; 152 while (nzk--) { 153 i = q[i]; 154 ju[juidx++] = i; 155 } 156 } 157 158 #if defined(PETSC_USE_INFO) 159 if (ai[mbs] != 0) { 160 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 161 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 162 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 163 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 164 PetscCall(PetscInfo(A, "for best performance.\n")); 165 } else { 166 PetscCall(PetscInfo(A, "Empty matrix\n")); 167 } 168 #endif 169 170 PetscCall(ISRestoreIndices(perm, &rip)); 171 PetscCall(PetscFree2(jl, q)); 172 173 /* put together the new matrix */ 174 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 175 176 b = (Mat_SeqSBAIJ *)(F)->data; 177 b->singlemalloc = PETSC_FALSE; 178 b->free_a = PETSC_TRUE; 179 b->free_ij = PETSC_TRUE; 180 181 PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a)); 182 b->j = ju; 183 b->i = iu; 184 b->diag = NULL; 185 b->ilen = NULL; 186 b->imax = NULL; 187 b->row = perm; 188 189 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 190 191 PetscCall(PetscObjectReference((PetscObject)perm)); 192 193 b->icol = perm; 194 PetscCall(PetscObjectReference((PetscObject)perm)); 195 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 196 /* In b structure: Free imax, ilen, old a, old j. 197 Allocate idnew, solve_work, new a, new j */ 198 b->maxnz = b->nz = iu[mbs]; 199 200 (F)->info.factor_mallocs = reallocs; 201 (F)->info.fill_ratio_given = f; 202 if (ai[mbs] != 0) { 203 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 204 } else { 205 (F)->info.fill_ratio_needed = 0.0; 206 } 207 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 /* 211 Symbolic U^T*D*U factorization for SBAIJ format. 212 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 213 */ 214 #include <petscbt.h> 215 #include <../src/mat/utils/freespace.h> 216 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 219 Mat_SeqSBAIJ *b; 220 PetscBool perm_identity, missing; 221 PetscReal fill = info->fill; 222 const PetscInt *rip, *ai = a->i, *aj = a->j; 223 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 224 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 225 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 226 PetscFreeSpaceList free_space = NULL, current_space = NULL; 227 PetscBT lnkbt; 228 229 PetscFunctionBegin; 230 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 231 PetscCall(MatMissingDiagonal(A, &missing, &i)); 232 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 233 if (bs > 1) { 234 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 /* check whether perm is the identity mapping */ 239 PetscCall(ISIdentity(perm, &perm_identity)); 240 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 241 a->permute = PETSC_FALSE; 242 PetscCall(ISGetIndices(perm, &rip)); 243 244 /* initialization */ 245 PetscCall(PetscMalloc1(mbs + 1, &ui)); 246 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 247 ui[0] = 0; 248 249 /* jl: linked list for storing indices of the pivot rows 250 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 251 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 252 for (i = 0; i < mbs; i++) { 253 jl[i] = mbs; 254 il[i] = 0; 255 } 256 257 /* create and initialize a linked list for storing column indices of the active row k */ 258 nlnk = mbs + 1; 259 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 260 261 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 262 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 263 current_space = free_space; 264 265 for (k = 0; k < mbs; k++) { /* for each active row k */ 266 /* initialize lnk by the column indices of row rip[k] of A */ 267 nzk = 0; 268 ncols = ai[k + 1] - ai[k]; 269 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 270 for (j = 0; j < ncols; j++) { 271 i = *(aj + ai[k] + j); 272 cols[j] = i; 273 } 274 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 275 nzk += nlnk; 276 277 /* update lnk by computing fill-in for each pivot row to be merged in */ 278 prow = jl[k]; /* 1st pivot row */ 279 280 while (prow < k) { 281 nextprow = jl[prow]; 282 /* merge prow into k-th row */ 283 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 284 jmax = ui[prow + 1]; 285 ncols = jmax - jmin; 286 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 287 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 288 nzk += nlnk; 289 290 /* update il and jl for prow */ 291 if (jmin < jmax) { 292 il[prow] = jmin; 293 j = *uj_ptr; 294 jl[prow] = jl[j]; 295 jl[j] = prow; 296 } 297 prow = nextprow; 298 } 299 300 /* if free space is not available, make more free space */ 301 if (current_space->local_remaining < nzk) { 302 i = mbs - k + 1; /* num of unfactored rows */ 303 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 304 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 305 reallocs++; 306 } 307 308 /* copy data into free space, then initialize lnk */ 309 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 310 311 /* add the k-th row into il and jl */ 312 if (nzk > 1) { 313 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 314 jl[k] = jl[i]; 315 jl[i] = k; 316 il[k] = ui[k] + 1; 317 } 318 ui_ptr[k] = current_space->array; 319 320 current_space->array += nzk; 321 current_space->local_used += nzk; 322 current_space->local_remaining -= nzk; 323 324 ui[k + 1] = ui[k] + nzk; 325 } 326 327 PetscCall(ISRestoreIndices(perm, &rip)); 328 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 329 330 /* destroy list of free space and other temporary array(s) */ 331 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 332 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 333 PetscCall(PetscLLDestroy(lnk, lnkbt)); 334 335 /* put together the new matrix in MATSEQSBAIJ format */ 336 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 337 338 b = (Mat_SeqSBAIJ *)fact->data; 339 b->singlemalloc = PETSC_FALSE; 340 b->free_a = PETSC_TRUE; 341 b->free_ij = PETSC_TRUE; 342 343 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 344 345 b->j = uj; 346 b->i = ui; 347 b->diag = udiag; 348 b->free_diag = PETSC_TRUE; 349 b->ilen = NULL; 350 b->imax = NULL; 351 b->row = perm; 352 b->icol = perm; 353 354 PetscCall(PetscObjectReference((PetscObject)perm)); 355 PetscCall(PetscObjectReference((PetscObject)perm)); 356 357 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 358 359 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 360 361 b->maxnz = b->nz = ui[mbs]; 362 363 fact->info.factor_mallocs = reallocs; 364 fact->info.fill_ratio_given = fill; 365 if (ai[mbs] != 0) { 366 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 367 } else { 368 fact->info.fill_ratio_needed = 0.0; 369 } 370 #if defined(PETSC_USE_INFO) 371 if (ai[mbs] != 0) { 372 PetscReal af = fact->info.fill_ratio_needed; 373 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 374 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 375 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 376 } else { 377 PetscCall(PetscInfo(A, "Empty matrix\n")); 378 } 379 #endif 380 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 385 { 386 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 387 Mat_SeqSBAIJ *b; 388 PetscBool perm_identity, missing; 389 PetscReal fill = info->fill; 390 const PetscInt *rip, *ai, *aj; 391 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 392 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 393 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 394 PetscFreeSpaceList free_space = NULL, current_space = NULL; 395 PetscBT lnkbt; 396 397 PetscFunctionBegin; 398 PetscCall(MatMissingDiagonal(A, &missing, &d)); 399 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 400 401 /* 402 This code originally uses Modified Sparse Row (MSR) storage 403 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 404 Then it is rewritten so the factor B takes seqsbaij format. However the associated 405 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 406 thus the original code in MSR format is still used for these cases. 407 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 408 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 409 */ 410 if (bs > 1) { 411 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 412 PetscFunctionReturn(PETSC_SUCCESS); 413 } 414 415 /* check whether perm is the identity mapping */ 416 PetscCall(ISIdentity(perm, &perm_identity)); 417 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 418 a->permute = PETSC_FALSE; 419 ai = a->i; 420 aj = a->j; 421 PetscCall(ISGetIndices(perm, &rip)); 422 423 /* initialization */ 424 PetscCall(PetscMalloc1(mbs + 1, &ui)); 425 ui[0] = 0; 426 427 /* jl: linked list for storing indices of the pivot rows 428 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 429 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 430 for (i = 0; i < mbs; i++) { 431 jl[i] = mbs; 432 il[i] = 0; 433 } 434 435 /* create and initialize a linked list for storing column indices of the active row k */ 436 nlnk = mbs + 1; 437 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 438 439 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 440 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 441 current_space = free_space; 442 443 for (k = 0; k < mbs; k++) { /* for each active row k */ 444 /* initialize lnk by the column indices of row rip[k] of A */ 445 nzk = 0; 446 ncols = ai[rip[k] + 1] - ai[rip[k]]; 447 for (j = 0; j < ncols; j++) { 448 i = *(aj + ai[rip[k]] + j); 449 cols[j] = rip[i]; 450 } 451 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 452 nzk += nlnk; 453 454 /* update lnk by computing fill-in for each pivot row to be merged in */ 455 prow = jl[k]; /* 1st pivot row */ 456 457 while (prow < k) { 458 nextprow = jl[prow]; 459 /* merge prow into k-th row */ 460 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 461 jmax = ui[prow + 1]; 462 ncols = jmax - jmin; 463 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 464 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 465 nzk += nlnk; 466 467 /* update il and jl for prow */ 468 if (jmin < jmax) { 469 il[prow] = jmin; 470 471 j = *uj_ptr; 472 jl[prow] = jl[j]; 473 jl[j] = prow; 474 } 475 prow = nextprow; 476 } 477 478 /* if free space is not available, make more free space */ 479 if (current_space->local_remaining < nzk) { 480 i = mbs - k + 1; /* num of unfactored rows */ 481 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 482 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 483 reallocs++; 484 } 485 486 /* copy data into free space, then initialize lnk */ 487 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 488 489 /* add the k-th row into il and jl */ 490 if (nzk - 1 > 0) { 491 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 492 jl[k] = jl[i]; 493 jl[i] = k; 494 il[k] = ui[k] + 1; 495 } 496 ui_ptr[k] = current_space->array; 497 498 current_space->array += nzk; 499 current_space->local_used += nzk; 500 current_space->local_remaining -= nzk; 501 502 ui[k + 1] = ui[k] + nzk; 503 } 504 505 PetscCall(ISRestoreIndices(perm, &rip)); 506 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 507 508 /* destroy list of free space and other temporary array(s) */ 509 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 510 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 511 PetscCall(PetscLLDestroy(lnk, lnkbt)); 512 513 /* put together the new matrix in MATSEQSBAIJ format */ 514 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 515 516 b = (Mat_SeqSBAIJ *)fact->data; 517 b->singlemalloc = PETSC_FALSE; 518 b->free_a = PETSC_TRUE; 519 b->free_ij = PETSC_TRUE; 520 521 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 522 523 b->j = uj; 524 b->i = ui; 525 b->diag = NULL; 526 b->ilen = NULL; 527 b->imax = NULL; 528 b->row = perm; 529 530 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 531 532 PetscCall(PetscObjectReference((PetscObject)perm)); 533 b->icol = perm; 534 PetscCall(PetscObjectReference((PetscObject)perm)); 535 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 536 b->maxnz = b->nz = ui[mbs]; 537 538 fact->info.factor_mallocs = reallocs; 539 fact->info.fill_ratio_given = fill; 540 if (ai[mbs] != 0) { 541 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 542 } else { 543 fact->info.fill_ratio_needed = 0.0; 544 } 545 #if defined(PETSC_USE_INFO) 546 if (ai[mbs] != 0) { 547 PetscReal af = fact->info.fill_ratio_needed; 548 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 549 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 550 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 551 } else { 552 PetscCall(PetscInfo(A, "Empty matrix\n")); 553 } 554 #endif 555 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 556 PetscFunctionReturn(PETSC_SUCCESS); 557 } 558 559 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 560 { 561 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 562 IS perm = b->row; 563 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 564 PetscInt i, j; 565 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 566 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 567 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 568 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 569 MatScalar *work; 570 PetscInt *pivots; 571 PetscBool allowzeropivot, zeropivotdetected; 572 573 PetscFunctionBegin; 574 /* initialization */ 575 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 576 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 577 allowzeropivot = PetscNot(A->erroriffailure); 578 579 il[0] = 0; 580 for (i = 0; i < mbs; i++) jl[i] = mbs; 581 582 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 583 PetscCall(PetscMalloc1(bs, &pivots)); 584 585 PetscCall(ISGetIndices(perm, &perm_ptr)); 586 587 /* check permutation */ 588 if (!a->permute) { 589 ai = a->i; 590 aj = a->j; 591 aa = a->a; 592 } else { 593 ai = a->inew; 594 aj = a->jnew; 595 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 596 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 597 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 598 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 599 600 for (i = 0; i < mbs; i++) { 601 jmin = ai[i]; 602 jmax = ai[i + 1]; 603 for (j = jmin; j < jmax; j++) { 604 while (a2anew[j] != j) { 605 k = a2anew[j]; 606 a2anew[j] = a2anew[k]; 607 a2anew[k] = k; 608 for (k1 = 0; k1 < bs2; k1++) { 609 dk[k1] = aa[k * bs2 + k1]; 610 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 611 aa[j * bs2 + k1] = dk[k1]; 612 } 613 } 614 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 615 if (i > aj[j]) { 616 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 617 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 618 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 619 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 620 } 621 } 622 } 623 } 624 PetscCall(PetscFree(a2anew)); 625 } 626 627 /* for each row k */ 628 for (k = 0; k < mbs; k++) { 629 /*initialize k-th row with elements nonzero in row perm(k) of A */ 630 jmin = ai[perm_ptr[k]]; 631 jmax = ai[perm_ptr[k] + 1]; 632 633 ap = aa + jmin * bs2; 634 for (j = jmin; j < jmax; j++) { 635 vj = perm_ptr[aj[j]]; /* block col. index */ 636 rtmp_ptr = rtmp + vj * bs2; 637 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 638 } 639 640 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 641 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 642 i = jl[k]; /* first row to be added to k_th row */ 643 644 while (i < k) { 645 nexti = jl[i]; /* next row to be added to k_th row */ 646 647 /* compute multiplier */ 648 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 649 650 /* uik = -inv(Di)*U_bar(i,k) */ 651 diag = ba + i * bs2; 652 u = ba + ili * bs2; 653 PetscCall(PetscArrayzero(uik, bs2)); 654 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 655 656 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 657 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 658 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 659 660 /* update -U(i,k) */ 661 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 662 663 /* add multiple of row i to k-th row ... */ 664 jmin = ili + 1; 665 jmax = bi[i + 1]; 666 if (jmin < jmax) { 667 for (j = jmin; j < jmax; j++) { 668 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 669 rtmp_ptr = rtmp + bj[j] * bs2; 670 u = ba + j * bs2; 671 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 672 } 673 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 674 675 /* ... add i to row list for next nonzero entry */ 676 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 677 j = bj[jmin]; 678 jl[i] = jl[j]; 679 jl[j] = i; /* update jl */ 680 } 681 i = nexti; 682 } 683 684 /* save nonzero entries in k-th row of U ... */ 685 686 /* invert diagonal block */ 687 diag = ba + k * bs2; 688 PetscCall(PetscArraycpy(diag, dk, bs2)); 689 690 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 691 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 692 693 jmin = bi[k]; 694 jmax = bi[k + 1]; 695 if (jmin < jmax) { 696 for (j = jmin; j < jmax; j++) { 697 vj = bj[j]; /* block col. index of U */ 698 u = ba + j * bs2; 699 rtmp_ptr = rtmp + vj * bs2; 700 for (k1 = 0; k1 < bs2; k1++) { 701 *u++ = *rtmp_ptr; 702 *rtmp_ptr++ = 0.0; 703 } 704 } 705 706 /* ... add k to row list for first nonzero entry in k-th row */ 707 il[k] = jmin; 708 i = bj[jmin]; 709 jl[k] = jl[i]; 710 jl[i] = k; 711 } 712 } 713 714 PetscCall(PetscFree(rtmp)); 715 PetscCall(PetscFree2(il, jl)); 716 PetscCall(PetscFree3(dk, uik, work)); 717 PetscCall(PetscFree(pivots)); 718 if (a->permute) PetscCall(PetscFree(aa)); 719 720 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 721 722 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 723 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 724 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 725 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 726 727 C->assembled = PETSC_TRUE; 728 C->preallocated = PETSC_TRUE; 729 730 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 731 PetscFunctionReturn(PETSC_SUCCESS); 732 } 733 734 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 735 { 736 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 737 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 738 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 739 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 740 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 741 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 742 MatScalar *work; 743 PetscInt *pivots; 744 PetscBool allowzeropivot, zeropivotdetected; 745 746 PetscFunctionBegin; 747 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 748 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 749 il[0] = 0; 750 for (i = 0; i < mbs; i++) jl[i] = mbs; 751 752 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 753 PetscCall(PetscMalloc1(bs, &pivots)); 754 allowzeropivot = PetscNot(A->erroriffailure); 755 756 ai = a->i; 757 aj = a->j; 758 aa = a->a; 759 760 /* for each row k */ 761 for (k = 0; k < mbs; k++) { 762 /*initialize k-th row with elements nonzero in row k of A */ 763 jmin = ai[k]; 764 jmax = ai[k + 1]; 765 ap = aa + jmin * bs2; 766 for (j = jmin; j < jmax; j++) { 767 vj = aj[j]; /* block col. index */ 768 rtmp_ptr = rtmp + vj * bs2; 769 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 770 } 771 772 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 773 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 774 i = jl[k]; /* first row to be added to k_th row */ 775 776 while (i < k) { 777 nexti = jl[i]; /* next row to be added to k_th row */ 778 779 /* compute multiplier */ 780 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 781 782 /* uik = -inv(Di)*U_bar(i,k) */ 783 diag = ba + i * bs2; 784 u = ba + ili * bs2; 785 PetscCall(PetscArrayzero(uik, bs2)); 786 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 787 788 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 789 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 790 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 791 792 /* update -U(i,k) */ 793 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 794 795 /* add multiple of row i to k-th row ... */ 796 jmin = ili + 1; 797 jmax = bi[i + 1]; 798 if (jmin < jmax) { 799 for (j = jmin; j < jmax; j++) { 800 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 801 rtmp_ptr = rtmp + bj[j] * bs2; 802 u = ba + j * bs2; 803 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 804 } 805 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 806 807 /* ... add i to row list for next nonzero entry */ 808 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 809 j = bj[jmin]; 810 jl[i] = jl[j]; 811 jl[j] = i; /* update jl */ 812 } 813 i = nexti; 814 } 815 816 /* save nonzero entries in k-th row of U ... */ 817 818 /* invert diagonal block */ 819 diag = ba + k * bs2; 820 PetscCall(PetscArraycpy(diag, dk, bs2)); 821 822 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 823 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 824 825 jmin = bi[k]; 826 jmax = bi[k + 1]; 827 if (jmin < jmax) { 828 for (j = jmin; j < jmax; j++) { 829 vj = bj[j]; /* block col. index of U */ 830 u = ba + j * bs2; 831 rtmp_ptr = rtmp + vj * bs2; 832 for (k1 = 0; k1 < bs2; k1++) { 833 *u++ = *rtmp_ptr; 834 *rtmp_ptr++ = 0.0; 835 } 836 } 837 838 /* ... add k to row list for first nonzero entry in k-th row */ 839 il[k] = jmin; 840 i = bj[jmin]; 841 jl[k] = jl[i]; 842 jl[i] = k; 843 } 844 } 845 846 PetscCall(PetscFree(rtmp)); 847 PetscCall(PetscFree2(il, jl)); 848 PetscCall(PetscFree3(dk, uik, work)); 849 PetscCall(PetscFree(pivots)); 850 851 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 852 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 853 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 854 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 855 C->assembled = PETSC_TRUE; 856 C->preallocated = PETSC_TRUE; 857 858 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 /* 863 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 864 Version for blocks 2 by 2. 865 */ 866 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) 867 { 868 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 869 IS perm = b->row; 870 const PetscInt *ai, *aj, *perm_ptr; 871 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 872 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 873 MatScalar *ba = b->a, *aa, *ap; 874 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 875 PetscReal shift = info->shiftamount; 876 PetscBool allowzeropivot, zeropivotdetected; 877 878 PetscFunctionBegin; 879 allowzeropivot = PetscNot(A->erroriffailure); 880 881 /* initialization */ 882 /* il and jl record the first nonzero element in each row of the accessing 883 window U(0:k, k:mbs-1). 884 jl: list of rows to be added to uneliminated rows 885 i>= k: jl(i) is the first row to be added to row i 886 i< k: jl(i) is the row following row i in some list of rows 887 jl(i) = mbs indicates the end of a list 888 il(i): points to the first nonzero element in columns k,...,mbs-1 of 889 row i of U */ 890 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 891 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 892 il[0] = 0; 893 for (i = 0; i < mbs; i++) jl[i] = mbs; 894 895 PetscCall(ISGetIndices(perm, &perm_ptr)); 896 897 /* check permutation */ 898 if (!a->permute) { 899 ai = a->i; 900 aj = a->j; 901 aa = a->a; 902 } else { 903 ai = a->inew; 904 aj = a->jnew; 905 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 906 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 907 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 908 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 909 910 for (i = 0; i < mbs; i++) { 911 jmin = ai[i]; 912 jmax = ai[i + 1]; 913 for (j = jmin; j < jmax; j++) { 914 while (a2anew[j] != j) { 915 k = a2anew[j]; 916 a2anew[j] = a2anew[k]; 917 a2anew[k] = k; 918 for (k1 = 0; k1 < 4; k1++) { 919 dk[k1] = aa[k * 4 + k1]; 920 aa[k * 4 + k1] = aa[j * 4 + k1]; 921 aa[j * 4 + k1] = dk[k1]; 922 } 923 } 924 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 925 if (i > aj[j]) { 926 ap = aa + j * 4; /* ptr to the beginning of the block */ 927 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 928 ap[1] = ap[2]; 929 ap[2] = dk[1]; 930 } 931 } 932 } 933 PetscCall(PetscFree(a2anew)); 934 } 935 936 /* for each row k */ 937 for (k = 0; k < mbs; k++) { 938 /*initialize k-th row with elements nonzero in row perm(k) of A */ 939 jmin = ai[perm_ptr[k]]; 940 jmax = ai[perm_ptr[k] + 1]; 941 ap = aa + jmin * 4; 942 for (j = jmin; j < jmax; j++) { 943 vj = perm_ptr[aj[j]]; /* block col. index */ 944 rtmp_ptr = rtmp + vj * 4; 945 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 946 } 947 948 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 949 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 950 i = jl[k]; /* first row to be added to k_th row */ 951 952 while (i < k) { 953 nexti = jl[i]; /* next row to be added to k_th row */ 954 955 /* compute multiplier */ 956 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 957 958 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 959 diag = ba + i * 4; 960 u = ba + ili * 4; 961 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 962 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 963 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 964 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 965 966 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 967 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 968 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 969 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 970 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 971 972 PetscCall(PetscLogFlops(16.0 * 2.0)); 973 974 /* update -U(i,k): ba[ili] = uik */ 975 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 976 977 /* add multiple of row i to k-th row ... */ 978 jmin = ili + 1; 979 jmax = bi[i + 1]; 980 if (jmin < jmax) { 981 for (j = jmin; j < jmax; j++) { 982 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 983 rtmp_ptr = rtmp + bj[j] * 4; 984 u = ba + j * 4; 985 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 986 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 987 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 988 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 989 } 990 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 991 992 /* ... add i to row list for next nonzero entry */ 993 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 994 j = bj[jmin]; 995 jl[i] = jl[j]; 996 jl[j] = i; /* update jl */ 997 } 998 i = nexti; 999 } 1000 1001 /* save nonzero entries in k-th row of U ... */ 1002 1003 /* invert diagonal block */ 1004 diag = ba + k * 4; 1005 PetscCall(PetscArraycpy(diag, dk, 4)); 1006 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1007 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1008 1009 jmin = bi[k]; 1010 jmax = bi[k + 1]; 1011 if (jmin < jmax) { 1012 for (j = jmin; j < jmax; j++) { 1013 vj = bj[j]; /* block col. index of U */ 1014 u = ba + j * 4; 1015 rtmp_ptr = rtmp + vj * 4; 1016 for (k1 = 0; k1 < 4; k1++) { 1017 *u++ = *rtmp_ptr; 1018 *rtmp_ptr++ = 0.0; 1019 } 1020 } 1021 1022 /* ... add k to row list for first nonzero entry in k-th row */ 1023 il[k] = jmin; 1024 i = bj[jmin]; 1025 jl[k] = jl[i]; 1026 jl[i] = k; 1027 } 1028 } 1029 1030 PetscCall(PetscFree(rtmp)); 1031 PetscCall(PetscFree2(il, jl)); 1032 if (a->permute) PetscCall(PetscFree(aa)); 1033 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1034 1035 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1036 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1037 C->assembled = PETSC_TRUE; 1038 C->preallocated = PETSC_TRUE; 1039 1040 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 /* 1045 Version for when blocks are 2 by 2 Using natural ordering 1046 */ 1047 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 1048 { 1049 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1050 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1051 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1052 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1053 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1054 PetscReal shift = info->shiftamount; 1055 PetscBool allowzeropivot, zeropivotdetected; 1056 1057 PetscFunctionBegin; 1058 allowzeropivot = PetscNot(A->erroriffailure); 1059 1060 /* initialization */ 1061 /* il and jl record the first nonzero element in each row of the accessing 1062 window U(0:k, k:mbs-1). 1063 jl: list of rows to be added to uneliminated rows 1064 i>= k: jl(i) is the first row to be added to row i 1065 i< k: jl(i) is the row following row i in some list of rows 1066 jl(i) = mbs indicates the end of a list 1067 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1068 row i of U */ 1069 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1070 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1071 il[0] = 0; 1072 for (i = 0; i < mbs; i++) jl[i] = mbs; 1073 1074 ai = a->i; 1075 aj = a->j; 1076 aa = a->a; 1077 1078 /* for each row k */ 1079 for (k = 0; k < mbs; k++) { 1080 /*initialize k-th row with elements nonzero in row k of A */ 1081 jmin = ai[k]; 1082 jmax = ai[k + 1]; 1083 ap = aa + jmin * 4; 1084 for (j = jmin; j < jmax; j++) { 1085 vj = aj[j]; /* block col. index */ 1086 rtmp_ptr = rtmp + vj * 4; 1087 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1088 } 1089 1090 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1091 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1092 i = jl[k]; /* first row to be added to k_th row */ 1093 1094 while (i < k) { 1095 nexti = jl[i]; /* next row to be added to k_th row */ 1096 1097 /* compute multiplier */ 1098 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1099 1100 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1101 diag = ba + i * 4; 1102 u = ba + ili * 4; 1103 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1104 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1105 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1106 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1107 1108 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1109 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1110 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1111 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1112 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1113 1114 PetscCall(PetscLogFlops(16.0 * 2.0)); 1115 1116 /* update -U(i,k): ba[ili] = uik */ 1117 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1118 1119 /* add multiple of row i to k-th row ... */ 1120 jmin = ili + 1; 1121 jmax = bi[i + 1]; 1122 if (jmin < jmax) { 1123 for (j = jmin; j < jmax; j++) { 1124 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1125 rtmp_ptr = rtmp + bj[j] * 4; 1126 u = ba + j * 4; 1127 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1128 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1129 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1130 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1131 } 1132 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1133 1134 /* ... add i to row list for next nonzero entry */ 1135 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1136 j = bj[jmin]; 1137 jl[i] = jl[j]; 1138 jl[j] = i; /* update jl */ 1139 } 1140 i = nexti; 1141 } 1142 1143 /* save nonzero entries in k-th row of U ... */ 1144 1145 /* invert diagonal block */ 1146 diag = ba + k * 4; 1147 PetscCall(PetscArraycpy(diag, dk, 4)); 1148 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1149 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1150 1151 jmin = bi[k]; 1152 jmax = bi[k + 1]; 1153 if (jmin < jmax) { 1154 for (j = jmin; j < jmax; j++) { 1155 vj = bj[j]; /* block col. index of U */ 1156 u = ba + j * 4; 1157 rtmp_ptr = rtmp + vj * 4; 1158 for (k1 = 0; k1 < 4; k1++) { 1159 *u++ = *rtmp_ptr; 1160 *rtmp_ptr++ = 0.0; 1161 } 1162 } 1163 1164 /* ... add k to row list for first nonzero entry in k-th row */ 1165 il[k] = jmin; 1166 i = bj[jmin]; 1167 jl[k] = jl[i]; 1168 jl[i] = k; 1169 } 1170 } 1171 1172 PetscCall(PetscFree(rtmp)); 1173 PetscCall(PetscFree2(il, jl)); 1174 1175 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1176 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1177 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1178 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1179 C->assembled = PETSC_TRUE; 1180 C->preallocated = PETSC_TRUE; 1181 1182 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 /* 1187 Numeric U^T*D*U factorization for SBAIJ format. 1188 Version for blocks are 1 by 1. 1189 */ 1190 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 1191 { 1192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1193 IS ip = b->row; 1194 const PetscInt *ai, *aj, *rip; 1195 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1196 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1197 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1198 PetscReal rs; 1199 FactorShiftCtx sctx; 1200 1201 PetscFunctionBegin; 1202 /* MatPivotSetUp(): initialize shift context sctx */ 1203 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1204 1205 PetscCall(ISGetIndices(ip, &rip)); 1206 if (!a->permute) { 1207 ai = a->i; 1208 aj = a->j; 1209 aa = a->a; 1210 } else { 1211 ai = a->inew; 1212 aj = a->jnew; 1213 nz = ai[mbs]; 1214 PetscCall(PetscMalloc1(nz, &aa)); 1215 a2anew = a->a2anew; 1216 bval = a->a; 1217 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1218 } 1219 1220 /* initialization */ 1221 /* il and jl record the first nonzero element in each row of the accessing 1222 window U(0:k, k:mbs-1). 1223 jl: list of rows to be added to uneliminated rows 1224 i>= k: jl(i) is the first row to be added to row i 1225 i< k: jl(i) is the row following row i in some list of rows 1226 jl(i) = mbs indicates the end of a list 1227 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1228 row i of U */ 1229 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1230 1231 do { 1232 sctx.newshift = PETSC_FALSE; 1233 il[0] = 0; 1234 for (i = 0; i < mbs; i++) { 1235 rtmp[i] = 0.0; 1236 jl[i] = mbs; 1237 } 1238 1239 for (k = 0; k < mbs; k++) { 1240 /*initialize k-th row by the perm[k]-th row of A */ 1241 jmin = ai[rip[k]]; 1242 jmax = ai[rip[k] + 1]; 1243 bval = ba + bi[k]; 1244 for (j = jmin; j < jmax; j++) { 1245 col = rip[aj[j]]; 1246 rtmp[col] = aa[j]; 1247 *bval++ = 0.0; /* for in-place factorization */ 1248 } 1249 1250 /* shift the diagonal of the matrix */ 1251 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1252 1253 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1254 dk = rtmp[k]; 1255 i = jl[k]; /* first row to be added to k_th row */ 1256 1257 while (i < k) { 1258 nexti = jl[i]; /* next row to be added to k_th row */ 1259 1260 /* compute multiplier, update diag(k) and U(i,k) */ 1261 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1262 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1263 dk += uikdi * ba[ili]; 1264 ba[ili] = uikdi; /* -U(i,k) */ 1265 1266 /* add multiple of row i to k-th row */ 1267 jmin = ili + 1; 1268 jmax = bi[i + 1]; 1269 if (jmin < jmax) { 1270 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1271 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1272 1273 /* update il and jl for row i */ 1274 il[i] = jmin; 1275 j = bj[jmin]; 1276 jl[i] = jl[j]; 1277 jl[j] = i; 1278 } 1279 i = nexti; 1280 } 1281 1282 /* shift the diagonals when zero pivot is detected */ 1283 /* compute rs=sum of abs(off-diagonal) */ 1284 rs = 0.0; 1285 jmin = bi[k] + 1; 1286 nz = bi[k + 1] - jmin; 1287 if (nz) { 1288 bcol = bj + jmin; 1289 while (nz--) { 1290 rs += PetscAbsScalar(rtmp[*bcol]); 1291 bcol++; 1292 } 1293 } 1294 1295 sctx.rs = rs; 1296 sctx.pv = dk; 1297 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1298 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1299 dk = sctx.pv; 1300 1301 /* copy data into U(k,:) */ 1302 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1303 jmin = bi[k] + 1; 1304 jmax = bi[k + 1]; 1305 if (jmin < jmax) { 1306 for (j = jmin; j < jmax; j++) { 1307 col = bj[j]; 1308 ba[j] = rtmp[col]; 1309 rtmp[col] = 0.0; 1310 } 1311 /* add the k-th row into il and jl */ 1312 il[k] = jmin; 1313 i = bj[jmin]; 1314 jl[k] = jl[i]; 1315 jl[i] = k; 1316 } 1317 } 1318 } while (sctx.newshift); 1319 PetscCall(PetscFree3(rtmp, il, jl)); 1320 if (a->permute) PetscCall(PetscFree(aa)); 1321 1322 PetscCall(ISRestoreIndices(ip, &rip)); 1323 1324 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1325 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1326 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1327 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1328 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1329 C->assembled = PETSC_TRUE; 1330 C->preallocated = PETSC_TRUE; 1331 1332 PetscCall(PetscLogFlops(C->rmap->N)); 1333 if (sctx.nshift) { 1334 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1335 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1336 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1337 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1338 } 1339 } 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } 1342 1343 /* 1344 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1345 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1346 */ 1347 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1348 { 1349 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1350 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1351 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1352 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1353 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1354 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1355 FactorShiftCtx sctx; 1356 PetscReal rs; 1357 MatScalar d, *v; 1358 1359 PetscFunctionBegin; 1360 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1361 1362 /* MatPivotSetUp(): initialize shift context sctx */ 1363 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1364 1365 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1366 sctx.shift_top = info->zeropivot; 1367 1368 PetscCall(PetscArrayzero(rtmp, mbs)); 1369 1370 for (i = 0; i < mbs; i++) { 1371 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1372 d = (aa)[a->diag[i]]; 1373 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1374 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1375 v = aa + ai[i] + 1; 1376 nz = ai[i + 1] - ai[i] - 1; 1377 for (j = 0; j < nz; j++) { 1378 rtmp[i] += PetscAbsScalar(v[j]); 1379 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1380 } 1381 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1382 } 1383 sctx.shift_top *= 1.1; 1384 sctx.nshift_max = 5; 1385 sctx.shift_lo = 0.; 1386 sctx.shift_hi = 1.; 1387 } 1388 1389 /* allocate working arrays 1390 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1391 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1392 */ 1393 do { 1394 sctx.newshift = PETSC_FALSE; 1395 1396 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1397 if (mbs) il[0] = 0; 1398 1399 for (k = 0; k < mbs; k++) { 1400 /* zero rtmp */ 1401 nz = bi[k + 1] - bi[k]; 1402 bjtmp = bj + bi[k]; 1403 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1404 1405 /* load in initial unfactored row */ 1406 bval = ba + bi[k]; 1407 jmin = ai[k]; 1408 jmax = ai[k + 1]; 1409 for (j = jmin; j < jmax; j++) { 1410 col = aj[j]; 1411 rtmp[col] = aa[j]; 1412 *bval++ = 0.0; /* for in-place factorization */ 1413 } 1414 /* shift the diagonal of the matrix: ZeropivotApply() */ 1415 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1416 1417 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1418 dk = rtmp[k]; 1419 i = c2r[k]; /* first row to be added to k_th row */ 1420 1421 while (i < k) { 1422 nexti = c2r[i]; /* next row to be added to k_th row */ 1423 1424 /* compute multiplier, update diag(k) and U(i,k) */ 1425 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1426 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1427 dk += uikdi * ba[ili]; /* update diag[k] */ 1428 ba[ili] = uikdi; /* -U(i,k) */ 1429 1430 /* add multiple of row i to k-th row */ 1431 jmin = ili + 1; 1432 jmax = bi[i + 1]; 1433 if (jmin < jmax) { 1434 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1435 /* update il and c2r for row i */ 1436 il[i] = jmin; 1437 j = bj[jmin]; 1438 c2r[i] = c2r[j]; 1439 c2r[j] = i; 1440 } 1441 i = nexti; 1442 } 1443 1444 /* copy data into U(k,:) */ 1445 rs = 0.0; 1446 jmin = bi[k]; 1447 jmax = bi[k + 1] - 1; 1448 if (jmin < jmax) { 1449 for (j = jmin; j < jmax; j++) { 1450 col = bj[j]; 1451 ba[j] = rtmp[col]; 1452 rs += PetscAbsScalar(ba[j]); 1453 } 1454 /* add the k-th row into il and c2r */ 1455 il[k] = jmin; 1456 i = bj[jmin]; 1457 c2r[k] = c2r[i]; 1458 c2r[i] = k; 1459 } 1460 1461 sctx.rs = rs; 1462 sctx.pv = dk; 1463 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1464 if (sctx.newshift) break; 1465 dk = sctx.pv; 1466 1467 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1468 } 1469 } while (sctx.newshift); 1470 1471 PetscCall(PetscFree3(rtmp, il, c2r)); 1472 1473 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1474 B->ops->solves = MatSolves_SeqSBAIJ_1; 1475 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1476 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1477 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1478 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1479 1480 B->assembled = PETSC_TRUE; 1481 B->preallocated = PETSC_TRUE; 1482 1483 PetscCall(PetscLogFlops(B->rmap->n)); 1484 1485 /* MatPivotView() */ 1486 if (sctx.nshift) { 1487 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1488 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1489 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1490 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1491 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1492 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1493 } 1494 } 1495 PetscFunctionReturn(PETSC_SUCCESS); 1496 } 1497 1498 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 1499 { 1500 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1501 PetscInt i, j, mbs = a->mbs; 1502 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1503 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1504 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1505 PetscReal rs; 1506 FactorShiftCtx sctx; 1507 1508 PetscFunctionBegin; 1509 /* MatPivotSetUp(): initialize shift context sctx */ 1510 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1511 1512 /* initialization */ 1513 /* il and jl record the first nonzero element in each row of the accessing 1514 window U(0:k, k:mbs-1). 1515 jl: list of rows to be added to uneliminated rows 1516 i>= k: jl(i) is the first row to be added to row i 1517 i< k: jl(i) is the row following row i in some list of rows 1518 jl(i) = mbs indicates the end of a list 1519 il(i): points to the first nonzero element in U(i,k:mbs-1) 1520 */ 1521 PetscCall(PetscMalloc1(mbs, &rtmp)); 1522 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1523 1524 do { 1525 sctx.newshift = PETSC_FALSE; 1526 il[0] = 0; 1527 for (i = 0; i < mbs; i++) { 1528 rtmp[i] = 0.0; 1529 jl[i] = mbs; 1530 } 1531 1532 for (k = 0; k < mbs; k++) { 1533 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1534 nz = ai[k + 1] - ai[k]; 1535 acol = aj + ai[k]; 1536 aval = aa + ai[k]; 1537 bval = ba + bi[k]; 1538 while (nz--) { 1539 rtmp[*acol++] = *aval++; 1540 *bval++ = 0.0; /* for in-place factorization */ 1541 } 1542 1543 /* shift the diagonal of the matrix */ 1544 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1545 1546 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1547 dk = rtmp[k]; 1548 i = jl[k]; /* first row to be added to k_th row */ 1549 1550 while (i < k) { 1551 nexti = jl[i]; /* next row to be added to k_th row */ 1552 /* compute multiplier, update D(k) and U(i,k) */ 1553 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1554 uikdi = -ba[ili] * ba[bi[i]]; 1555 dk += uikdi * ba[ili]; 1556 ba[ili] = uikdi; /* -U(i,k) */ 1557 1558 /* add multiple of row i to k-th row ... */ 1559 jmin = ili + 1; 1560 nz = bi[i + 1] - jmin; 1561 if (nz > 0) { 1562 bcol = bj + jmin; 1563 bval = ba + jmin; 1564 PetscCall(PetscLogFlops(2.0 * nz)); 1565 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1566 1567 /* update il and jl for i-th row */ 1568 il[i] = jmin; 1569 j = bj[jmin]; 1570 jl[i] = jl[j]; 1571 jl[j] = i; 1572 } 1573 i = nexti; 1574 } 1575 1576 /* shift the diagonals when zero pivot is detected */ 1577 /* compute rs=sum of abs(off-diagonal) */ 1578 rs = 0.0; 1579 jmin = bi[k] + 1; 1580 nz = bi[k + 1] - jmin; 1581 if (nz) { 1582 bcol = bj + jmin; 1583 while (nz--) { 1584 rs += PetscAbsScalar(rtmp[*bcol]); 1585 bcol++; 1586 } 1587 } 1588 1589 sctx.rs = rs; 1590 sctx.pv = dk; 1591 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1592 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1593 dk = sctx.pv; 1594 1595 /* copy data into U(k,:) */ 1596 ba[bi[k]] = 1.0 / dk; 1597 jmin = bi[k] + 1; 1598 nz = bi[k + 1] - jmin; 1599 if (nz) { 1600 bcol = bj + jmin; 1601 bval = ba + jmin; 1602 while (nz--) { 1603 *bval++ = rtmp[*bcol]; 1604 rtmp[*bcol++] = 0.0; 1605 } 1606 /* add k-th row into il and jl */ 1607 il[k] = jmin; 1608 i = bj[jmin]; 1609 jl[k] = jl[i]; 1610 jl[i] = k; 1611 } 1612 } /* end of for (k = 0; k<mbs; k++) */ 1613 } while (sctx.newshift); 1614 PetscCall(PetscFree(rtmp)); 1615 PetscCall(PetscFree2(il, jl)); 1616 1617 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1618 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1619 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1620 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1621 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1622 1623 C->assembled = PETSC_TRUE; 1624 C->preallocated = PETSC_TRUE; 1625 1626 PetscCall(PetscLogFlops(C->rmap->N)); 1627 if (sctx.nshift) { 1628 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1629 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1630 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1631 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1632 } 1633 } 1634 PetscFunctionReturn(PETSC_SUCCESS); 1635 } 1636 1637 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) 1638 { 1639 Mat C; 1640 1641 PetscFunctionBegin; 1642 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1643 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1644 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1645 1646 A->ops->solve = C->ops->solve; 1647 A->ops->solvetranspose = C->ops->solvetranspose; 1648 1649 PetscCall(MatHeaderMerge(A, &C)); 1650 PetscFunctionReturn(PETSC_SUCCESS); 1651 } 1652