1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petsc/private/kernels/blockinvert.h> 4 #include <petscis.h> 5 6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 7 { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 for (i = 0; i < mbs; i++) { 18 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 19 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 20 fi++; 21 } 22 if (nneg) *nneg = nneg_tmp; 23 if (npos) *npos = npos_tmp; 24 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 25 PetscFunctionReturn(PETSC_SUCCESS); 26 } 27 28 /* 29 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 30 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 31 */ 32 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) 33 { 34 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 35 const PetscInt *rip, *ai, *aj; 36 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 37 PetscInt m, reallocs = 0, prow; 38 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 39 PetscReal f = info->fill; 40 PetscBool perm_identity; 41 42 PetscFunctionBegin; 43 /* check whether perm is the identity mapping */ 44 PetscCall(ISIdentity(perm, &perm_identity)); 45 PetscCall(ISGetIndices(perm, &rip)); 46 47 if (perm_identity) { /* without permutation */ 48 a->permute = PETSC_FALSE; 49 50 ai = a->i; 51 aj = a->j; 52 } else { /* non-trivial permutation */ 53 a->permute = PETSC_TRUE; 54 55 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 56 57 ai = a->inew; 58 aj = a->jnew; 59 } 60 61 /* initialization */ 62 PetscCall(PetscMalloc1(mbs + 1, &iu)); 63 umax = (PetscInt)(f * ai[mbs] + 1); 64 umax += mbs + 1; 65 PetscCall(PetscMalloc1(umax, &ju)); 66 iu[0] = mbs + 1; 67 juidx = mbs + 1; /* index for ju */ 68 /* jl linked list for pivot row -- linked list for col index */ 69 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 70 for (i = 0; i < mbs; i++) { 71 jl[i] = mbs; 72 q[i] = 0; 73 } 74 75 /* for each row k */ 76 for (k = 0; k < mbs; k++) { 77 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 78 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 79 q[k] = mbs; 80 /* initialize nonzero structure of k-th row to row rip[k] of A */ 81 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 82 jmax = ai[rip[k] + 1]; 83 for (j = jmin; j < jmax; j++) { 84 vj = rip[aj[j]]; /* col. value */ 85 if (vj > k) { 86 qm = k; 87 do { 88 m = qm; 89 qm = q[m]; 90 } while (qm < vj); 91 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 92 nzk++; 93 q[m] = vj; 94 q[vj] = qm; 95 } /* if (vj > k) */ 96 } /* for (j=jmin; j<jmax; j++) */ 97 98 /* modify nonzero structure of k-th row by computing fill-in 99 for each row i to be merged in */ 100 prow = k; 101 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 102 103 while (prow < k) { 104 /* merge row prow into k-th row */ 105 jmin = iu[prow] + 1; 106 jmax = iu[prow + 1]; 107 qm = k; 108 for (j = jmin; j < jmax; j++) { 109 vj = ju[j]; 110 do { 111 m = qm; 112 qm = q[m]; 113 } while (qm < vj); 114 if (qm != vj) { 115 nzk++; 116 q[m] = vj; 117 q[vj] = qm; 118 qm = vj; 119 } 120 } 121 prow = jl[prow]; /* next pivot row */ 122 } 123 124 /* add k to row list for first nonzero element in k-th row */ 125 if (nzk > 0) { 126 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 127 jl[k] = jl[i]; 128 jl[i] = k; 129 } 130 iu[k + 1] = iu[k] + nzk; 131 132 /* allocate more space to ju if needed */ 133 if (iu[k + 1] > umax) { 134 /* estimate how much additional space we will need */ 135 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 136 /* just double the memory each time */ 137 maxadd = umax; 138 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 139 umax += maxadd; 140 141 /* allocate a longer ju */ 142 PetscCall(PetscMalloc1(umax, &jutmp)); 143 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 144 PetscCall(PetscFree(ju)); 145 ju = jutmp; 146 reallocs++; /* count how many times we realloc */ 147 } 148 149 /* save nonzero structure of k-th row in ju */ 150 i = k; 151 while (nzk--) { 152 i = q[i]; 153 ju[juidx++] = i; 154 } 155 } 156 157 #if defined(PETSC_USE_INFO) 158 if (ai[mbs] != 0) { 159 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 160 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 161 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 162 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 163 PetscCall(PetscInfo(A, "for best performance.\n")); 164 } else { 165 PetscCall(PetscInfo(A, "Empty matrix\n")); 166 } 167 #endif 168 169 PetscCall(ISRestoreIndices(perm, &rip)); 170 PetscCall(PetscFree2(jl, q)); 171 172 /* put together the new matrix */ 173 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 174 175 b = (Mat_SeqSBAIJ *)(F)->data; 176 b->singlemalloc = PETSC_FALSE; 177 b->free_a = PETSC_TRUE; 178 b->free_ij = PETSC_TRUE; 179 180 PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a)); 181 b->j = ju; 182 b->i = iu; 183 b->diag = NULL; 184 b->ilen = NULL; 185 b->imax = NULL; 186 b->row = perm; 187 188 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 189 190 PetscCall(PetscObjectReference((PetscObject)perm)); 191 192 b->icol = perm; 193 PetscCall(PetscObjectReference((PetscObject)perm)); 194 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 195 /* In b structure: Free imax, ilen, old a, old j. 196 Allocate idnew, solve_work, new a, new j */ 197 b->maxnz = b->nz = iu[mbs]; 198 199 (F)->info.factor_mallocs = reallocs; 200 (F)->info.fill_ratio_given = f; 201 if (ai[mbs] != 0) { 202 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 203 } else { 204 (F)->info.fill_ratio_needed = 0.0; 205 } 206 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 /* 210 Symbolic U^T*D*U factorization for SBAIJ format. 211 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 212 */ 213 #include <petscbt.h> 214 #include <../src/mat/utils/freespace.h> 215 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 216 { 217 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 218 Mat_SeqSBAIJ *b; 219 PetscBool perm_identity, missing; 220 PetscReal fill = info->fill; 221 const PetscInt *rip, *ai = a->i, *aj = a->j; 222 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 223 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 224 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 225 PetscFreeSpaceList free_space = NULL, current_space = NULL; 226 PetscBT lnkbt; 227 228 PetscFunctionBegin; 229 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 230 PetscCall(MatMissingDiagonal(A, &missing, &i)); 231 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 232 if (bs > 1) { 233 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 /* check whether perm is the identity mapping */ 238 PetscCall(ISIdentity(perm, &perm_identity)); 239 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 240 a->permute = PETSC_FALSE; 241 PetscCall(ISGetIndices(perm, &rip)); 242 243 /* initialization */ 244 PetscCall(PetscMalloc1(mbs + 1, &ui)); 245 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 246 ui[0] = 0; 247 248 /* jl: linked list for storing indices of the pivot rows 249 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 250 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 251 for (i = 0; i < mbs; i++) { 252 jl[i] = mbs; 253 il[i] = 0; 254 } 255 256 /* create and initialize a linked list for storing column indices of the active row k */ 257 nlnk = mbs + 1; 258 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 259 260 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 261 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 262 current_space = free_space; 263 264 for (k = 0; k < mbs; k++) { /* for each active row k */ 265 /* initialize lnk by the column indices of row rip[k] of A */ 266 nzk = 0; 267 ncols = ai[k + 1] - ai[k]; 268 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 269 for (j = 0; j < ncols; j++) { 270 i = *(aj + ai[k] + j); 271 cols[j] = i; 272 } 273 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 274 nzk += nlnk; 275 276 /* update lnk by computing fill-in for each pivot row to be merged in */ 277 prow = jl[k]; /* 1st pivot row */ 278 279 while (prow < k) { 280 nextprow = jl[prow]; 281 /* merge prow into k-th row */ 282 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 283 jmax = ui[prow + 1]; 284 ncols = jmax - jmin; 285 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 286 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 287 nzk += nlnk; 288 289 /* update il and jl for prow */ 290 if (jmin < jmax) { 291 il[prow] = jmin; 292 j = *uj_ptr; 293 jl[prow] = jl[j]; 294 jl[j] = prow; 295 } 296 prow = nextprow; 297 } 298 299 /* if free space is not available, make more free space */ 300 if (current_space->local_remaining < nzk) { 301 i = mbs - k + 1; /* num of unfactored rows */ 302 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 303 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 304 reallocs++; 305 } 306 307 /* copy data into free space, then initialize lnk */ 308 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 309 310 /* add the k-th row into il and jl */ 311 if (nzk > 1) { 312 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 313 jl[k] = jl[i]; 314 jl[i] = k; 315 il[k] = ui[k] + 1; 316 } 317 ui_ptr[k] = current_space->array; 318 319 current_space->array += nzk; 320 current_space->local_used += nzk; 321 current_space->local_remaining -= nzk; 322 323 ui[k + 1] = ui[k] + nzk; 324 } 325 326 PetscCall(ISRestoreIndices(perm, &rip)); 327 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 328 329 /* destroy list of free space and other temporary array(s) */ 330 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 331 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 332 PetscCall(PetscLLDestroy(lnk, lnkbt)); 333 334 /* put together the new matrix in MATSEQSBAIJ format */ 335 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 336 337 b = (Mat_SeqSBAIJ *)fact->data; 338 b->singlemalloc = PETSC_FALSE; 339 b->free_a = PETSC_TRUE; 340 b->free_ij = PETSC_TRUE; 341 342 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 343 344 b->j = uj; 345 b->i = ui; 346 b->diag = udiag; 347 b->free_diag = PETSC_TRUE; 348 b->ilen = NULL; 349 b->imax = NULL; 350 b->row = perm; 351 b->icol = perm; 352 353 PetscCall(PetscObjectReference((PetscObject)perm)); 354 PetscCall(PetscObjectReference((PetscObject)perm)); 355 356 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 357 358 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 359 360 b->maxnz = b->nz = ui[mbs]; 361 362 fact->info.factor_mallocs = reallocs; 363 fact->info.fill_ratio_given = fill; 364 if (ai[mbs] != 0) { 365 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 366 } else { 367 fact->info.fill_ratio_needed = 0.0; 368 } 369 #if defined(PETSC_USE_INFO) 370 if (ai[mbs] != 0) { 371 PetscReal af = fact->info.fill_ratio_needed; 372 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 373 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 374 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 375 } else { 376 PetscCall(PetscInfo(A, "Empty matrix\n")); 377 } 378 #endif 379 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 384 { 385 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 386 Mat_SeqSBAIJ *b; 387 PetscBool perm_identity, missing; 388 PetscReal fill = info->fill; 389 const PetscInt *rip, *ai, *aj; 390 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 391 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 392 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 393 PetscFreeSpaceList free_space = NULL, current_space = NULL; 394 PetscBT lnkbt; 395 396 PetscFunctionBegin; 397 PetscCall(MatMissingDiagonal(A, &missing, &d)); 398 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 399 400 /* 401 This code originally uses Modified Sparse Row (MSR) storage 402 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 403 Then it is rewritten so the factor B takes seqsbaij format. However the associated 404 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 405 thus the original code in MSR format is still used for these cases. 406 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 407 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 408 */ 409 if (bs > 1) { 410 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413 414 /* check whether perm is the identity mapping */ 415 PetscCall(ISIdentity(perm, &perm_identity)); 416 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 417 a->permute = PETSC_FALSE; 418 ai = a->i; 419 aj = a->j; 420 PetscCall(ISGetIndices(perm, &rip)); 421 422 /* initialization */ 423 PetscCall(PetscMalloc1(mbs + 1, &ui)); 424 ui[0] = 0; 425 426 /* jl: linked list for storing indices of the pivot rows 427 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 428 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 429 for (i = 0; i < mbs; i++) { 430 jl[i] = mbs; 431 il[i] = 0; 432 } 433 434 /* create and initialize a linked list for storing column indices of the active row k */ 435 nlnk = mbs + 1; 436 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 437 438 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 439 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 440 current_space = free_space; 441 442 for (k = 0; k < mbs; k++) { /* for each active row k */ 443 /* initialize lnk by the column indices of row rip[k] of A */ 444 nzk = 0; 445 ncols = ai[rip[k] + 1] - ai[rip[k]]; 446 for (j = 0; j < ncols; j++) { 447 i = *(aj + ai[rip[k]] + j); 448 cols[j] = rip[i]; 449 } 450 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 451 nzk += nlnk; 452 453 /* update lnk by computing fill-in for each pivot row to be merged in */ 454 prow = jl[k]; /* 1st pivot row */ 455 456 while (prow < k) { 457 nextprow = jl[prow]; 458 /* merge prow into k-th row */ 459 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 460 jmax = ui[prow + 1]; 461 ncols = jmax - jmin; 462 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 463 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 464 nzk += nlnk; 465 466 /* update il and jl for prow */ 467 if (jmin < jmax) { 468 il[prow] = jmin; 469 470 j = *uj_ptr; 471 jl[prow] = jl[j]; 472 jl[j] = prow; 473 } 474 prow = nextprow; 475 } 476 477 /* if free space is not available, make more free space */ 478 if (current_space->local_remaining < nzk) { 479 i = mbs - k + 1; /* num of unfactored rows */ 480 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 481 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 482 reallocs++; 483 } 484 485 /* copy data into free space, then initialize lnk */ 486 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 487 488 /* add the k-th row into il and jl */ 489 if (nzk - 1 > 0) { 490 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 491 jl[k] = jl[i]; 492 jl[i] = k; 493 il[k] = ui[k] + 1; 494 } 495 ui_ptr[k] = current_space->array; 496 497 current_space->array += nzk; 498 current_space->local_used += nzk; 499 current_space->local_remaining -= nzk; 500 501 ui[k + 1] = ui[k] + nzk; 502 } 503 504 PetscCall(ISRestoreIndices(perm, &rip)); 505 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 506 507 /* destroy list of free space and other temporary array(s) */ 508 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 509 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 510 PetscCall(PetscLLDestroy(lnk, lnkbt)); 511 512 /* put together the new matrix in MATSEQSBAIJ format */ 513 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 514 515 b = (Mat_SeqSBAIJ *)fact->data; 516 b->singlemalloc = PETSC_FALSE; 517 b->free_a = PETSC_TRUE; 518 b->free_ij = PETSC_TRUE; 519 520 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 521 522 b->j = uj; 523 b->i = ui; 524 b->diag = NULL; 525 b->ilen = NULL; 526 b->imax = NULL; 527 b->row = perm; 528 529 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 530 531 PetscCall(PetscObjectReference((PetscObject)perm)); 532 b->icol = perm; 533 PetscCall(PetscObjectReference((PetscObject)perm)); 534 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 535 b->maxnz = b->nz = ui[mbs]; 536 537 fact->info.factor_mallocs = reallocs; 538 fact->info.fill_ratio_given = fill; 539 if (ai[mbs] != 0) { 540 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 541 } else { 542 fact->info.fill_ratio_needed = 0.0; 543 } 544 #if defined(PETSC_USE_INFO) 545 if (ai[mbs] != 0) { 546 PetscReal af = fact->info.fill_ratio_needed; 547 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 548 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 549 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 550 } else { 551 PetscCall(PetscInfo(A, "Empty matrix\n")); 552 } 553 #endif 554 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 555 PetscFunctionReturn(PETSC_SUCCESS); 556 } 557 558 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 559 { 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 561 IS perm = b->row; 562 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 563 PetscInt i, j; 564 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 565 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 566 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 567 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 568 MatScalar *work; 569 PetscInt *pivots; 570 PetscBool allowzeropivot, zeropivotdetected; 571 572 PetscFunctionBegin; 573 /* initialization */ 574 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 575 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 576 allowzeropivot = PetscNot(A->erroriffailure); 577 578 il[0] = 0; 579 for (i = 0; i < mbs; i++) jl[i] = mbs; 580 581 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 582 PetscCall(PetscMalloc1(bs, &pivots)); 583 584 PetscCall(ISGetIndices(perm, &perm_ptr)); 585 586 /* check permutation */ 587 if (!a->permute) { 588 ai = a->i; 589 aj = a->j; 590 aa = a->a; 591 } else { 592 ai = a->inew; 593 aj = a->jnew; 594 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 595 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 596 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 597 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 598 599 for (i = 0; i < mbs; i++) { 600 jmin = ai[i]; 601 jmax = ai[i + 1]; 602 for (j = jmin; j < jmax; j++) { 603 while (a2anew[j] != j) { 604 k = a2anew[j]; 605 a2anew[j] = a2anew[k]; 606 a2anew[k] = k; 607 for (k1 = 0; k1 < bs2; k1++) { 608 dk[k1] = aa[k * bs2 + k1]; 609 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 610 aa[j * bs2 + k1] = dk[k1]; 611 } 612 } 613 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 614 if (i > aj[j]) { 615 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 616 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 617 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 618 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 619 } 620 } 621 } 622 } 623 PetscCall(PetscFree(a2anew)); 624 } 625 626 /* for each row k */ 627 for (k = 0; k < mbs; k++) { 628 /*initialize k-th row with elements nonzero in row perm(k) of A */ 629 jmin = ai[perm_ptr[k]]; 630 jmax = ai[perm_ptr[k] + 1]; 631 632 ap = aa + jmin * bs2; 633 for (j = jmin; j < jmax; j++) { 634 vj = perm_ptr[aj[j]]; /* block col. index */ 635 rtmp_ptr = rtmp + vj * bs2; 636 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 637 } 638 639 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 640 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 641 i = jl[k]; /* first row to be added to k_th row */ 642 643 while (i < k) { 644 nexti = jl[i]; /* next row to be added to k_th row */ 645 646 /* compute multiplier */ 647 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 648 649 /* uik = -inv(Di)*U_bar(i,k) */ 650 diag = ba + i * bs2; 651 u = ba + ili * bs2; 652 PetscCall(PetscArrayzero(uik, bs2)); 653 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 654 655 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 656 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 657 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 658 659 /* update -U(i,k) */ 660 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 661 662 /* add multiple of row i to k-th row ... */ 663 jmin = ili + 1; 664 jmax = bi[i + 1]; 665 if (jmin < jmax) { 666 for (j = jmin; j < jmax; j++) { 667 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 668 rtmp_ptr = rtmp + bj[j] * bs2; 669 u = ba + j * bs2; 670 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 671 } 672 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 673 674 /* ... add i to row list for next nonzero entry */ 675 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 676 j = bj[jmin]; 677 jl[i] = jl[j]; 678 jl[j] = i; /* update jl */ 679 } 680 i = nexti; 681 } 682 683 /* save nonzero entries in k-th row of U ... */ 684 685 /* invert diagonal block */ 686 diag = ba + k * bs2; 687 PetscCall(PetscArraycpy(diag, dk, bs2)); 688 689 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 690 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 691 692 jmin = bi[k]; 693 jmax = bi[k + 1]; 694 if (jmin < jmax) { 695 for (j = jmin; j < jmax; j++) { 696 vj = bj[j]; /* block col. index of U */ 697 u = ba + j * bs2; 698 rtmp_ptr = rtmp + vj * bs2; 699 for (k1 = 0; k1 < bs2; k1++) { 700 *u++ = *rtmp_ptr; 701 *rtmp_ptr++ = 0.0; 702 } 703 } 704 705 /* ... add k to row list for first nonzero entry in k-th row */ 706 il[k] = jmin; 707 i = bj[jmin]; 708 jl[k] = jl[i]; 709 jl[i] = k; 710 } 711 } 712 713 PetscCall(PetscFree(rtmp)); 714 PetscCall(PetscFree2(il, jl)); 715 PetscCall(PetscFree3(dk, uik, work)); 716 PetscCall(PetscFree(pivots)); 717 if (a->permute) PetscCall(PetscFree(aa)); 718 719 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 720 721 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 722 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 723 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 724 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 725 726 C->assembled = PETSC_TRUE; 727 C->preallocated = PETSC_TRUE; 728 729 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 730 PetscFunctionReturn(PETSC_SUCCESS); 731 } 732 733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 734 { 735 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 736 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 737 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 738 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 739 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 740 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 741 MatScalar *work; 742 PetscInt *pivots; 743 PetscBool allowzeropivot, zeropivotdetected; 744 745 PetscFunctionBegin; 746 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 747 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 748 il[0] = 0; 749 for (i = 0; i < mbs; i++) jl[i] = mbs; 750 751 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 752 PetscCall(PetscMalloc1(bs, &pivots)); 753 allowzeropivot = PetscNot(A->erroriffailure); 754 755 ai = a->i; 756 aj = a->j; 757 aa = a->a; 758 759 /* for each row k */ 760 for (k = 0; k < mbs; k++) { 761 /*initialize k-th row with elements nonzero in row k of A */ 762 jmin = ai[k]; 763 jmax = ai[k + 1]; 764 ap = aa + jmin * bs2; 765 for (j = jmin; j < jmax; j++) { 766 vj = aj[j]; /* block col. index */ 767 rtmp_ptr = rtmp + vj * bs2; 768 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 769 } 770 771 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 772 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 773 i = jl[k]; /* first row to be added to k_th row */ 774 775 while (i < k) { 776 nexti = jl[i]; /* next row to be added to k_th row */ 777 778 /* compute multiplier */ 779 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 780 781 /* uik = -inv(Di)*U_bar(i,k) */ 782 diag = ba + i * bs2; 783 u = ba + ili * bs2; 784 PetscCall(PetscArrayzero(uik, bs2)); 785 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 786 787 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 788 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 789 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 790 791 /* update -U(i,k) */ 792 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 793 794 /* add multiple of row i to k-th row ... */ 795 jmin = ili + 1; 796 jmax = bi[i + 1]; 797 if (jmin < jmax) { 798 for (j = jmin; j < jmax; j++) { 799 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 800 rtmp_ptr = rtmp + bj[j] * bs2; 801 u = ba + j * bs2; 802 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 803 } 804 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 805 806 /* ... add i to row list for next nonzero entry */ 807 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 808 j = bj[jmin]; 809 jl[i] = jl[j]; 810 jl[j] = i; /* update jl */ 811 } 812 i = nexti; 813 } 814 815 /* save nonzero entries in k-th row of U ... */ 816 817 /* invert diagonal block */ 818 diag = ba + k * bs2; 819 PetscCall(PetscArraycpy(diag, dk, bs2)); 820 821 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 822 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 823 824 jmin = bi[k]; 825 jmax = bi[k + 1]; 826 if (jmin < jmax) { 827 for (j = jmin; j < jmax; j++) { 828 vj = bj[j]; /* block col. index of U */ 829 u = ba + j * bs2; 830 rtmp_ptr = rtmp + vj * bs2; 831 for (k1 = 0; k1 < bs2; k1++) { 832 *u++ = *rtmp_ptr; 833 *rtmp_ptr++ = 0.0; 834 } 835 } 836 837 /* ... add k to row list for first nonzero entry in k-th row */ 838 il[k] = jmin; 839 i = bj[jmin]; 840 jl[k] = jl[i]; 841 jl[i] = k; 842 } 843 } 844 845 PetscCall(PetscFree(rtmp)); 846 PetscCall(PetscFree2(il, jl)); 847 PetscCall(PetscFree3(dk, uik, work)); 848 PetscCall(PetscFree(pivots)); 849 850 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 851 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 852 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 853 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 854 C->assembled = PETSC_TRUE; 855 C->preallocated = PETSC_TRUE; 856 857 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 /* 862 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 863 Version for blocks 2 by 2. 864 */ 865 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) 866 { 867 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 868 IS perm = b->row; 869 const PetscInt *ai, *aj, *perm_ptr; 870 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 871 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 872 MatScalar *ba = b->a, *aa, *ap; 873 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 874 PetscReal shift = info->shiftamount; 875 PetscBool allowzeropivot, zeropivotdetected; 876 877 PetscFunctionBegin; 878 allowzeropivot = PetscNot(A->erroriffailure); 879 880 /* initialization */ 881 /* il and jl record the first nonzero element in each row of the accessing 882 window U(0:k, k:mbs-1). 883 jl: list of rows to be added to uneliminated rows 884 i>= k: jl(i) is the first row to be added to row i 885 i< k: jl(i) is the row following row i in some list of rows 886 jl(i) = mbs indicates the end of a list 887 il(i): points to the first nonzero element in columns k,...,mbs-1 of 888 row i of U */ 889 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 890 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 891 il[0] = 0; 892 for (i = 0; i < mbs; i++) jl[i] = mbs; 893 894 PetscCall(ISGetIndices(perm, &perm_ptr)); 895 896 /* check permutation */ 897 if (!a->permute) { 898 ai = a->i; 899 aj = a->j; 900 aa = a->a; 901 } else { 902 ai = a->inew; 903 aj = a->jnew; 904 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 905 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 906 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 907 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 908 909 for (i = 0; i < mbs; i++) { 910 jmin = ai[i]; 911 jmax = ai[i + 1]; 912 for (j = jmin; j < jmax; j++) { 913 while (a2anew[j] != j) { 914 k = a2anew[j]; 915 a2anew[j] = a2anew[k]; 916 a2anew[k] = k; 917 for (k1 = 0; k1 < 4; k1++) { 918 dk[k1] = aa[k * 4 + k1]; 919 aa[k * 4 + k1] = aa[j * 4 + k1]; 920 aa[j * 4 + k1] = dk[k1]; 921 } 922 } 923 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 924 if (i > aj[j]) { 925 ap = aa + j * 4; /* ptr to the beginning of the block */ 926 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 927 ap[1] = ap[2]; 928 ap[2] = dk[1]; 929 } 930 } 931 } 932 PetscCall(PetscFree(a2anew)); 933 } 934 935 /* for each row k */ 936 for (k = 0; k < mbs; k++) { 937 /*initialize k-th row with elements nonzero in row perm(k) of A */ 938 jmin = ai[perm_ptr[k]]; 939 jmax = ai[perm_ptr[k] + 1]; 940 ap = aa + jmin * 4; 941 for (j = jmin; j < jmax; j++) { 942 vj = perm_ptr[aj[j]]; /* block col. index */ 943 rtmp_ptr = rtmp + vj * 4; 944 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 945 } 946 947 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 948 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 949 i = jl[k]; /* first row to be added to k_th row */ 950 951 while (i < k) { 952 nexti = jl[i]; /* next row to be added to k_th row */ 953 954 /* compute multiplier */ 955 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 956 957 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 958 diag = ba + i * 4; 959 u = ba + ili * 4; 960 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 961 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 962 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 963 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 964 965 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 966 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 967 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 968 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 969 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 970 971 PetscCall(PetscLogFlops(16.0 * 2.0)); 972 973 /* update -U(i,k): ba[ili] = uik */ 974 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 975 976 /* add multiple of row i to k-th row ... */ 977 jmin = ili + 1; 978 jmax = bi[i + 1]; 979 if (jmin < jmax) { 980 for (j = jmin; j < jmax; j++) { 981 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 982 rtmp_ptr = rtmp + bj[j] * 4; 983 u = ba + j * 4; 984 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 985 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 986 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 987 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 988 } 989 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 990 991 /* ... add i to row list for next nonzero entry */ 992 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 993 j = bj[jmin]; 994 jl[i] = jl[j]; 995 jl[j] = i; /* update jl */ 996 } 997 i = nexti; 998 } 999 1000 /* save nonzero entries in k-th row of U ... */ 1001 1002 /* invert diagonal block */ 1003 diag = ba + k * 4; 1004 PetscCall(PetscArraycpy(diag, dk, 4)); 1005 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1006 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1007 1008 jmin = bi[k]; 1009 jmax = bi[k + 1]; 1010 if (jmin < jmax) { 1011 for (j = jmin; j < jmax; j++) { 1012 vj = bj[j]; /* block col. index of U */ 1013 u = ba + j * 4; 1014 rtmp_ptr = rtmp + vj * 4; 1015 for (k1 = 0; k1 < 4; k1++) { 1016 *u++ = *rtmp_ptr; 1017 *rtmp_ptr++ = 0.0; 1018 } 1019 } 1020 1021 /* ... add k to row list for first nonzero entry in k-th row */ 1022 il[k] = jmin; 1023 i = bj[jmin]; 1024 jl[k] = jl[i]; 1025 jl[i] = k; 1026 } 1027 } 1028 1029 PetscCall(PetscFree(rtmp)); 1030 PetscCall(PetscFree2(il, jl)); 1031 if (a->permute) PetscCall(PetscFree(aa)); 1032 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1033 1034 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1035 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1036 C->assembled = PETSC_TRUE; 1037 C->preallocated = PETSC_TRUE; 1038 1039 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 /* 1044 Version for when blocks are 2 by 2 Using natural ordering 1045 */ 1046 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 1047 { 1048 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1049 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1050 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1051 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1052 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1053 PetscReal shift = info->shiftamount; 1054 PetscBool allowzeropivot, zeropivotdetected; 1055 1056 PetscFunctionBegin; 1057 allowzeropivot = PetscNot(A->erroriffailure); 1058 1059 /* initialization */ 1060 /* il and jl record the first nonzero element in each row of the accessing 1061 window U(0:k, k:mbs-1). 1062 jl: list of rows to be added to uneliminated rows 1063 i>= k: jl(i) is the first row to be added to row i 1064 i< k: jl(i) is the row following row i in some list of rows 1065 jl(i) = mbs indicates the end of a list 1066 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1067 row i of U */ 1068 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1069 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1070 il[0] = 0; 1071 for (i = 0; i < mbs; i++) jl[i] = mbs; 1072 1073 ai = a->i; 1074 aj = a->j; 1075 aa = a->a; 1076 1077 /* for each row k */ 1078 for (k = 0; k < mbs; k++) { 1079 /*initialize k-th row with elements nonzero in row k of A */ 1080 jmin = ai[k]; 1081 jmax = ai[k + 1]; 1082 ap = aa + jmin * 4; 1083 for (j = jmin; j < jmax; j++) { 1084 vj = aj[j]; /* block col. index */ 1085 rtmp_ptr = rtmp + vj * 4; 1086 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1087 } 1088 1089 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1090 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1091 i = jl[k]; /* first row to be added to k_th row */ 1092 1093 while (i < k) { 1094 nexti = jl[i]; /* next row to be added to k_th row */ 1095 1096 /* compute multiplier */ 1097 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1098 1099 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1100 diag = ba + i * 4; 1101 u = ba + ili * 4; 1102 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1103 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1104 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1105 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1106 1107 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1108 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1109 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1110 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1111 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1112 1113 PetscCall(PetscLogFlops(16.0 * 2.0)); 1114 1115 /* update -U(i,k): ba[ili] = uik */ 1116 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1117 1118 /* add multiple of row i to k-th row ... */ 1119 jmin = ili + 1; 1120 jmax = bi[i + 1]; 1121 if (jmin < jmax) { 1122 for (j = jmin; j < jmax; j++) { 1123 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1124 rtmp_ptr = rtmp + bj[j] * 4; 1125 u = ba + j * 4; 1126 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1127 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1128 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1129 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1130 } 1131 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1132 1133 /* ... add i to row list for next nonzero entry */ 1134 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1135 j = bj[jmin]; 1136 jl[i] = jl[j]; 1137 jl[j] = i; /* update jl */ 1138 } 1139 i = nexti; 1140 } 1141 1142 /* save nonzero entries in k-th row of U ... */ 1143 1144 /* invert diagonal block */ 1145 diag = ba + k * 4; 1146 PetscCall(PetscArraycpy(diag, dk, 4)); 1147 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1148 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1149 1150 jmin = bi[k]; 1151 jmax = bi[k + 1]; 1152 if (jmin < jmax) { 1153 for (j = jmin; j < jmax; j++) { 1154 vj = bj[j]; /* block col. index of U */ 1155 u = ba + j * 4; 1156 rtmp_ptr = rtmp + vj * 4; 1157 for (k1 = 0; k1 < 4; k1++) { 1158 *u++ = *rtmp_ptr; 1159 *rtmp_ptr++ = 0.0; 1160 } 1161 } 1162 1163 /* ... add k to row list for first nonzero entry in k-th row */ 1164 il[k] = jmin; 1165 i = bj[jmin]; 1166 jl[k] = jl[i]; 1167 jl[i] = k; 1168 } 1169 } 1170 1171 PetscCall(PetscFree(rtmp)); 1172 PetscCall(PetscFree2(il, jl)); 1173 1174 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1175 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1176 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1177 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1178 C->assembled = PETSC_TRUE; 1179 C->preallocated = PETSC_TRUE; 1180 1181 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 /* 1186 Numeric U^T*D*U factorization for SBAIJ format. 1187 Version for blocks are 1 by 1. 1188 */ 1189 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1192 IS ip = b->row; 1193 const PetscInt *ai, *aj, *rip; 1194 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1195 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1196 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1197 PetscReal rs; 1198 FactorShiftCtx sctx; 1199 1200 PetscFunctionBegin; 1201 /* MatPivotSetUp(): initialize shift context sctx */ 1202 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1203 1204 PetscCall(ISGetIndices(ip, &rip)); 1205 if (!a->permute) { 1206 ai = a->i; 1207 aj = a->j; 1208 aa = a->a; 1209 } else { 1210 ai = a->inew; 1211 aj = a->jnew; 1212 nz = ai[mbs]; 1213 PetscCall(PetscMalloc1(nz, &aa)); 1214 a2anew = a->a2anew; 1215 bval = a->a; 1216 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1217 } 1218 1219 /* initialization */ 1220 /* il and jl record the first nonzero element in each row of the accessing 1221 window U(0:k, k:mbs-1). 1222 jl: list of rows to be added to uneliminated rows 1223 i>= k: jl(i) is the first row to be added to row i 1224 i< k: jl(i) is the row following row i in some list of rows 1225 jl(i) = mbs indicates the end of a list 1226 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1227 row i of U */ 1228 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1229 1230 do { 1231 sctx.newshift = PETSC_FALSE; 1232 il[0] = 0; 1233 for (i = 0; i < mbs; i++) { 1234 rtmp[i] = 0.0; 1235 jl[i] = mbs; 1236 } 1237 1238 for (k = 0; k < mbs; k++) { 1239 /*initialize k-th row by the perm[k]-th row of A */ 1240 jmin = ai[rip[k]]; 1241 jmax = ai[rip[k] + 1]; 1242 bval = ba + bi[k]; 1243 for (j = jmin; j < jmax; j++) { 1244 col = rip[aj[j]]; 1245 rtmp[col] = aa[j]; 1246 *bval++ = 0.0; /* for in-place factorization */ 1247 } 1248 1249 /* shift the diagonal of the matrix */ 1250 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1251 1252 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1253 dk = rtmp[k]; 1254 i = jl[k]; /* first row to be added to k_th row */ 1255 1256 while (i < k) { 1257 nexti = jl[i]; /* next row to be added to k_th row */ 1258 1259 /* compute multiplier, update diag(k) and U(i,k) */ 1260 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1261 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1262 dk += uikdi * ba[ili]; 1263 ba[ili] = uikdi; /* -U(i,k) */ 1264 1265 /* add multiple of row i to k-th row */ 1266 jmin = ili + 1; 1267 jmax = bi[i + 1]; 1268 if (jmin < jmax) { 1269 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1270 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1271 1272 /* update il and jl for row i */ 1273 il[i] = jmin; 1274 j = bj[jmin]; 1275 jl[i] = jl[j]; 1276 jl[j] = i; 1277 } 1278 i = nexti; 1279 } 1280 1281 /* shift the diagonals when zero pivot is detected */ 1282 /* compute rs=sum of abs(off-diagonal) */ 1283 rs = 0.0; 1284 jmin = bi[k] + 1; 1285 nz = bi[k + 1] - jmin; 1286 if (nz) { 1287 bcol = bj + jmin; 1288 while (nz--) { 1289 rs += PetscAbsScalar(rtmp[*bcol]); 1290 bcol++; 1291 } 1292 } 1293 1294 sctx.rs = rs; 1295 sctx.pv = dk; 1296 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1297 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1298 dk = sctx.pv; 1299 1300 /* copy data into U(k,:) */ 1301 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1302 jmin = bi[k] + 1; 1303 jmax = bi[k + 1]; 1304 if (jmin < jmax) { 1305 for (j = jmin; j < jmax; j++) { 1306 col = bj[j]; 1307 ba[j] = rtmp[col]; 1308 rtmp[col] = 0.0; 1309 } 1310 /* add the k-th row into il and jl */ 1311 il[k] = jmin; 1312 i = bj[jmin]; 1313 jl[k] = jl[i]; 1314 jl[i] = k; 1315 } 1316 } 1317 } while (sctx.newshift); 1318 PetscCall(PetscFree3(rtmp, il, jl)); 1319 if (a->permute) PetscCall(PetscFree(aa)); 1320 1321 PetscCall(ISRestoreIndices(ip, &rip)); 1322 1323 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1324 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1325 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1326 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1327 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1328 C->assembled = PETSC_TRUE; 1329 C->preallocated = PETSC_TRUE; 1330 1331 PetscCall(PetscLogFlops(C->rmap->N)); 1332 if (sctx.nshift) { 1333 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1334 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1335 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1336 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1337 } 1338 } 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 /* 1343 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1344 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1345 */ 1346 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1347 { 1348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1349 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1350 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1351 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1352 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1353 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1354 FactorShiftCtx sctx; 1355 PetscReal rs; 1356 MatScalar d, *v; 1357 1358 PetscFunctionBegin; 1359 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1360 1361 /* MatPivotSetUp(): initialize shift context sctx */ 1362 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1363 1364 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1365 sctx.shift_top = info->zeropivot; 1366 1367 PetscCall(PetscArrayzero(rtmp, mbs)); 1368 1369 for (i = 0; i < mbs; i++) { 1370 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1371 d = (aa)[a->diag[i]]; 1372 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1373 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1374 v = aa + ai[i] + 1; 1375 nz = ai[i + 1] - ai[i] - 1; 1376 for (j = 0; j < nz; j++) { 1377 rtmp[i] += PetscAbsScalar(v[j]); 1378 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1379 } 1380 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1381 } 1382 sctx.shift_top *= 1.1; 1383 sctx.nshift_max = 5; 1384 sctx.shift_lo = 0.; 1385 sctx.shift_hi = 1.; 1386 } 1387 1388 /* allocate working arrays 1389 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1390 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1391 */ 1392 do { 1393 sctx.newshift = PETSC_FALSE; 1394 1395 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1396 if (mbs) il[0] = 0; 1397 1398 for (k = 0; k < mbs; k++) { 1399 /* zero rtmp */ 1400 nz = bi[k + 1] - bi[k]; 1401 bjtmp = bj + bi[k]; 1402 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1403 1404 /* load in initial unfactored row */ 1405 bval = ba + bi[k]; 1406 jmin = ai[k]; 1407 jmax = ai[k + 1]; 1408 for (j = jmin; j < jmax; j++) { 1409 col = aj[j]; 1410 rtmp[col] = aa[j]; 1411 *bval++ = 0.0; /* for in-place factorization */ 1412 } 1413 /* shift the diagonal of the matrix: ZeropivotApply() */ 1414 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1415 1416 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1417 dk = rtmp[k]; 1418 i = c2r[k]; /* first row to be added to k_th row */ 1419 1420 while (i < k) { 1421 nexti = c2r[i]; /* next row to be added to k_th row */ 1422 1423 /* compute multiplier, update diag(k) and U(i,k) */ 1424 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1425 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1426 dk += uikdi * ba[ili]; /* update diag[k] */ 1427 ba[ili] = uikdi; /* -U(i,k) */ 1428 1429 /* add multiple of row i to k-th row */ 1430 jmin = ili + 1; 1431 jmax = bi[i + 1]; 1432 if (jmin < jmax) { 1433 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1434 /* update il and c2r for row i */ 1435 il[i] = jmin; 1436 j = bj[jmin]; 1437 c2r[i] = c2r[j]; 1438 c2r[j] = i; 1439 } 1440 i = nexti; 1441 } 1442 1443 /* copy data into U(k,:) */ 1444 rs = 0.0; 1445 jmin = bi[k]; 1446 jmax = bi[k + 1] - 1; 1447 if (jmin < jmax) { 1448 for (j = jmin; j < jmax; j++) { 1449 col = bj[j]; 1450 ba[j] = rtmp[col]; 1451 rs += PetscAbsScalar(ba[j]); 1452 } 1453 /* add the k-th row into il and c2r */ 1454 il[k] = jmin; 1455 i = bj[jmin]; 1456 c2r[k] = c2r[i]; 1457 c2r[i] = k; 1458 } 1459 1460 sctx.rs = rs; 1461 sctx.pv = dk; 1462 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1463 if (sctx.newshift) break; 1464 dk = sctx.pv; 1465 1466 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1467 } 1468 } while (sctx.newshift); 1469 1470 PetscCall(PetscFree3(rtmp, il, c2r)); 1471 1472 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1473 B->ops->solves = MatSolves_SeqSBAIJ_1; 1474 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1475 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1476 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1477 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1478 1479 B->assembled = PETSC_TRUE; 1480 B->preallocated = PETSC_TRUE; 1481 1482 PetscCall(PetscLogFlops(B->rmap->n)); 1483 1484 /* MatPivotView() */ 1485 if (sctx.nshift) { 1486 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1487 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1488 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1489 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1490 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1491 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1492 } 1493 } 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 1498 { 1499 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1500 PetscInt i, j, mbs = a->mbs; 1501 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1502 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1503 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1504 PetscReal rs; 1505 FactorShiftCtx sctx; 1506 1507 PetscFunctionBegin; 1508 /* MatPivotSetUp(): initialize shift context sctx */ 1509 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1510 1511 /* initialization */ 1512 /* il and jl record the first nonzero element in each row of the accessing 1513 window U(0:k, k:mbs-1). 1514 jl: list of rows to be added to uneliminated rows 1515 i>= k: jl(i) is the first row to be added to row i 1516 i< k: jl(i) is the row following row i in some list of rows 1517 jl(i) = mbs indicates the end of a list 1518 il(i): points to the first nonzero element in U(i,k:mbs-1) 1519 */ 1520 PetscCall(PetscMalloc1(mbs, &rtmp)); 1521 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1522 1523 do { 1524 sctx.newshift = PETSC_FALSE; 1525 il[0] = 0; 1526 for (i = 0; i < mbs; i++) { 1527 rtmp[i] = 0.0; 1528 jl[i] = mbs; 1529 } 1530 1531 for (k = 0; k < mbs; k++) { 1532 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1533 nz = ai[k + 1] - ai[k]; 1534 acol = aj + ai[k]; 1535 aval = aa + ai[k]; 1536 bval = ba + bi[k]; 1537 while (nz--) { 1538 rtmp[*acol++] = *aval++; 1539 *bval++ = 0.0; /* for in-place factorization */ 1540 } 1541 1542 /* shift the diagonal of the matrix */ 1543 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1544 1545 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1546 dk = rtmp[k]; 1547 i = jl[k]; /* first row to be added to k_th row */ 1548 1549 while (i < k) { 1550 nexti = jl[i]; /* next row to be added to k_th row */ 1551 /* compute multiplier, update D(k) and U(i,k) */ 1552 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1553 uikdi = -ba[ili] * ba[bi[i]]; 1554 dk += uikdi * ba[ili]; 1555 ba[ili] = uikdi; /* -U(i,k) */ 1556 1557 /* add multiple of row i to k-th row ... */ 1558 jmin = ili + 1; 1559 nz = bi[i + 1] - jmin; 1560 if (nz > 0) { 1561 bcol = bj + jmin; 1562 bval = ba + jmin; 1563 PetscCall(PetscLogFlops(2.0 * nz)); 1564 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1565 1566 /* update il and jl for i-th row */ 1567 il[i] = jmin; 1568 j = bj[jmin]; 1569 jl[i] = jl[j]; 1570 jl[j] = i; 1571 } 1572 i = nexti; 1573 } 1574 1575 /* shift the diagonals when zero pivot is detected */ 1576 /* compute rs=sum of abs(off-diagonal) */ 1577 rs = 0.0; 1578 jmin = bi[k] + 1; 1579 nz = bi[k + 1] - jmin; 1580 if (nz) { 1581 bcol = bj + jmin; 1582 while (nz--) { 1583 rs += PetscAbsScalar(rtmp[*bcol]); 1584 bcol++; 1585 } 1586 } 1587 1588 sctx.rs = rs; 1589 sctx.pv = dk; 1590 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1591 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1592 dk = sctx.pv; 1593 1594 /* copy data into U(k,:) */ 1595 ba[bi[k]] = 1.0 / dk; 1596 jmin = bi[k] + 1; 1597 nz = bi[k + 1] - jmin; 1598 if (nz) { 1599 bcol = bj + jmin; 1600 bval = ba + jmin; 1601 while (nz--) { 1602 *bval++ = rtmp[*bcol]; 1603 rtmp[*bcol++] = 0.0; 1604 } 1605 /* add k-th row into il and jl */ 1606 il[k] = jmin; 1607 i = bj[jmin]; 1608 jl[k] = jl[i]; 1609 jl[i] = k; 1610 } 1611 } /* end of for (k = 0; k<mbs; k++) */ 1612 } while (sctx.newshift); 1613 PetscCall(PetscFree(rtmp)); 1614 PetscCall(PetscFree2(il, jl)); 1615 1616 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1617 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1618 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1619 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1620 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1621 1622 C->assembled = PETSC_TRUE; 1623 C->preallocated = PETSC_TRUE; 1624 1625 PetscCall(PetscLogFlops(C->rmap->N)); 1626 if (sctx.nshift) { 1627 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1628 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1629 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1630 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1631 } 1632 } 1633 PetscFunctionReturn(PETSC_SUCCESS); 1634 } 1635 1636 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) 1637 { 1638 Mat C; 1639 1640 PetscFunctionBegin; 1641 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1642 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1643 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1644 1645 A->ops->solve = C->ops->solve; 1646 A->ops->solvetranspose = C->ops->solvetranspose; 1647 1648 PetscCall(MatHeaderMerge(A, &C)); 1649 PetscFunctionReturn(PETSC_SUCCESS); 1650 } 1651