1 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <petscis.h> 6 7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 for (i = 0; i < mbs; i++) { 18 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 19 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 20 fi++; 21 } 22 if (nneg) *nneg = nneg_tmp; 23 if (npos) *npos = npos_tmp; 24 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 25 PetscFunctionReturn(0); 26 } 27 28 /* 29 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 30 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 31 */ 32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) { 33 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 34 const PetscInt *rip, *ai, *aj; 35 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 36 PetscInt m, reallocs = 0, prow; 37 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 38 PetscReal f = info->fill; 39 PetscBool perm_identity; 40 41 PetscFunctionBegin; 42 /* check whether perm is the identity mapping */ 43 PetscCall(ISIdentity(perm, &perm_identity)); 44 PetscCall(ISGetIndices(perm, &rip)); 45 46 if (perm_identity) { /* without permutation */ 47 a->permute = PETSC_FALSE; 48 49 ai = a->i; 50 aj = a->j; 51 } else { /* non-trivial permutation */ 52 a->permute = PETSC_TRUE; 53 54 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 55 56 ai = a->inew; 57 aj = a->jnew; 58 } 59 60 /* initialization */ 61 PetscCall(PetscMalloc1(mbs + 1, &iu)); 62 umax = (PetscInt)(f * ai[mbs] + 1); 63 umax += mbs + 1; 64 PetscCall(PetscMalloc1(umax, &ju)); 65 iu[0] = mbs + 1; 66 juidx = mbs + 1; /* index for ju */ 67 /* jl linked list for pivot row -- linked list for col index */ 68 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 69 for (i = 0; i < mbs; i++) { 70 jl[i] = mbs; 71 q[i] = 0; 72 } 73 74 /* for each row k */ 75 for (k = 0; k < mbs; k++) { 76 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 77 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 78 q[k] = mbs; 79 /* initialize nonzero structure of k-th row to row rip[k] of A */ 80 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 81 jmax = ai[rip[k] + 1]; 82 for (j = jmin; j < jmax; j++) { 83 vj = rip[aj[j]]; /* col. value */ 84 if (vj > k) { 85 qm = k; 86 do { 87 m = qm; 88 qm = q[m]; 89 } while (qm < vj); 90 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 91 nzk++; 92 q[m] = vj; 93 q[vj] = qm; 94 } /* if (vj > k) */ 95 } /* for (j=jmin; j<jmax; j++) */ 96 97 /* modify nonzero structure of k-th row by computing fill-in 98 for each row i to be merged in */ 99 prow = k; 100 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 101 102 while (prow < k) { 103 /* merge row prow into k-th row */ 104 jmin = iu[prow] + 1; 105 jmax = iu[prow + 1]; 106 qm = k; 107 for (j = jmin; j < jmax; j++) { 108 vj = ju[j]; 109 do { 110 m = qm; 111 qm = q[m]; 112 } while (qm < vj); 113 if (qm != vj) { 114 nzk++; 115 q[m] = vj; 116 q[vj] = qm; 117 qm = vj; 118 } 119 } 120 prow = jl[prow]; /* next pivot row */ 121 } 122 123 /* add k to row list for first nonzero element in k-th row */ 124 if (nzk > 0) { 125 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 126 jl[k] = jl[i]; 127 jl[i] = k; 128 } 129 iu[k + 1] = iu[k] + nzk; 130 131 /* allocate more space to ju if needed */ 132 if (iu[k + 1] > umax) { 133 /* estimate how much additional space we will need */ 134 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 135 /* just double the memory each time */ 136 maxadd = umax; 137 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 138 umax += maxadd; 139 140 /* allocate a longer ju */ 141 PetscCall(PetscMalloc1(umax, &jutmp)); 142 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 143 PetscCall(PetscFree(ju)); 144 ju = jutmp; 145 reallocs++; /* count how many times we realloc */ 146 } 147 148 /* save nonzero structure of k-th row in ju */ 149 i = k; 150 while (nzk--) { 151 i = q[i]; 152 ju[juidx++] = i; 153 } 154 } 155 156 #if defined(PETSC_USE_INFO) 157 if (ai[mbs] != 0) { 158 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 159 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 160 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 161 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 162 PetscCall(PetscInfo(A, "for best performance.\n")); 163 } else { 164 PetscCall(PetscInfo(A, "Empty matrix\n")); 165 } 166 #endif 167 168 PetscCall(ISRestoreIndices(perm, &rip)); 169 PetscCall(PetscFree2(jl, q)); 170 171 /* put together the new matrix */ 172 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 173 174 /* PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)iperm)); */ 175 b = (Mat_SeqSBAIJ *)(F)->data; 176 b->singlemalloc = PETSC_FALSE; 177 b->free_a = PETSC_TRUE; 178 b->free_ij = PETSC_TRUE; 179 180 PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a)); 181 b->j = ju; 182 b->i = iu; 183 b->diag = NULL; 184 b->ilen = NULL; 185 b->imax = NULL; 186 b->row = perm; 187 188 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 189 190 PetscCall(PetscObjectReference((PetscObject)perm)); 191 192 b->icol = perm; 193 PetscCall(PetscObjectReference((PetscObject)perm)); 194 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 195 /* In b structure: Free imax, ilen, old a, old j. 196 Allocate idnew, solve_work, new a, new j */ 197 PetscCall(PetscLogObjectMemory((PetscObject)F, (iu[mbs] - mbs) * (sizeof(PetscInt) + sizeof(MatScalar)))); 198 b->maxnz = b->nz = iu[mbs]; 199 200 (F)->info.factor_mallocs = reallocs; 201 (F)->info.fill_ratio_given = f; 202 if (ai[mbs] != 0) { 203 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 204 } else { 205 (F)->info.fill_ratio_needed = 0.0; 206 } 207 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 208 PetscFunctionReturn(0); 209 } 210 /* 211 Symbolic U^T*D*U factorization for SBAIJ format. 212 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 213 */ 214 #include <petscbt.h> 215 #include <../src/mat/utils/freespace.h> 216 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 217 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 218 Mat_SeqSBAIJ *b; 219 PetscBool perm_identity, missing; 220 PetscReal fill = info->fill; 221 const PetscInt *rip, *ai = a->i, *aj = a->j; 222 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 223 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 224 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 225 PetscFreeSpaceList free_space = NULL, current_space = NULL; 226 PetscBT lnkbt; 227 228 PetscFunctionBegin; 229 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 230 PetscCall(MatMissingDiagonal(A, &missing, &i)); 231 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 232 if (bs > 1) { 233 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 234 PetscFunctionReturn(0); 235 } 236 237 /* check whether perm is the identity mapping */ 238 PetscCall(ISIdentity(perm, &perm_identity)); 239 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 240 a->permute = PETSC_FALSE; 241 PetscCall(ISGetIndices(perm, &rip)); 242 243 /* initialization */ 244 PetscCall(PetscMalloc1(mbs + 1, &ui)); 245 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 246 ui[0] = 0; 247 248 /* jl: linked list for storing indices of the pivot rows 249 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 250 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 251 for (i = 0; i < mbs; i++) { 252 jl[i] = mbs; 253 il[i] = 0; 254 } 255 256 /* create and initialize a linked list for storing column indices of the active row k */ 257 nlnk = mbs + 1; 258 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 259 260 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 261 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 262 current_space = free_space; 263 264 for (k = 0; k < mbs; k++) { /* for each active row k */ 265 /* initialize lnk by the column indices of row rip[k] of A */ 266 nzk = 0; 267 ncols = ai[k + 1] - ai[k]; 268 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 269 for (j = 0; j < ncols; j++) { 270 i = *(aj + ai[k] + j); 271 cols[j] = i; 272 } 273 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 274 nzk += nlnk; 275 276 /* update lnk by computing fill-in for each pivot row to be merged in */ 277 prow = jl[k]; /* 1st pivot row */ 278 279 while (prow < k) { 280 nextprow = jl[prow]; 281 /* merge prow into k-th row */ 282 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 283 jmax = ui[prow + 1]; 284 ncols = jmax - jmin; 285 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 286 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 287 nzk += nlnk; 288 289 /* update il and jl for prow */ 290 if (jmin < jmax) { 291 il[prow] = jmin; 292 j = *uj_ptr; 293 jl[prow] = jl[j]; 294 jl[j] = prow; 295 } 296 prow = nextprow; 297 } 298 299 /* if free space is not available, make more free space */ 300 if (current_space->local_remaining < nzk) { 301 i = mbs - k + 1; /* num of unfactored rows */ 302 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 303 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 304 reallocs++; 305 } 306 307 /* copy data into free space, then initialize lnk */ 308 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 309 310 /* add the k-th row into il and jl */ 311 if (nzk > 1) { 312 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 313 jl[k] = jl[i]; 314 jl[i] = k; 315 il[k] = ui[k] + 1; 316 } 317 ui_ptr[k] = current_space->array; 318 319 current_space->array += nzk; 320 current_space->local_used += nzk; 321 current_space->local_remaining -= nzk; 322 323 ui[k + 1] = ui[k] + nzk; 324 } 325 326 PetscCall(ISRestoreIndices(perm, &rip)); 327 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 328 329 /* destroy list of free space and other temporary array(s) */ 330 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 331 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 332 PetscCall(PetscLLDestroy(lnk, lnkbt)); 333 334 /* put together the new matrix in MATSEQSBAIJ format */ 335 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 336 337 b = (Mat_SeqSBAIJ *)fact->data; 338 b->singlemalloc = PETSC_FALSE; 339 b->free_a = PETSC_TRUE; 340 b->free_ij = PETSC_TRUE; 341 342 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 343 344 b->j = uj; 345 b->i = ui; 346 b->diag = udiag; 347 b->free_diag = PETSC_TRUE; 348 b->ilen = NULL; 349 b->imax = NULL; 350 b->row = perm; 351 b->icol = perm; 352 353 PetscCall(PetscObjectReference((PetscObject)perm)); 354 PetscCall(PetscObjectReference((PetscObject)perm)); 355 356 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 357 358 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 359 PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[mbs] * (sizeof(PetscInt) + sizeof(MatScalar)))); 360 361 b->maxnz = b->nz = ui[mbs]; 362 363 fact->info.factor_mallocs = reallocs; 364 fact->info.fill_ratio_given = fill; 365 if (ai[mbs] != 0) { 366 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 367 } else { 368 fact->info.fill_ratio_needed = 0.0; 369 } 370 #if defined(PETSC_USE_INFO) 371 if (ai[mbs] != 0) { 372 PetscReal af = fact->info.fill_ratio_needed; 373 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 374 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 375 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 376 } else { 377 PetscCall(PetscInfo(A, "Empty matrix\n")); 378 } 379 #endif 380 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 385 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 386 Mat_SeqSBAIJ *b; 387 PetscBool perm_identity, missing; 388 PetscReal fill = info->fill; 389 const PetscInt *rip, *ai, *aj; 390 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 391 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 392 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 393 PetscFreeSpaceList free_space = NULL, current_space = NULL; 394 PetscBT lnkbt; 395 396 PetscFunctionBegin; 397 PetscCall(MatMissingDiagonal(A, &missing, &d)); 398 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 399 400 /* 401 This code originally uses Modified Sparse Row (MSR) storage 402 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 403 Then it is rewritten so the factor B takes seqsbaij format. However the associated 404 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 405 thus the original code in MSR format is still used for these cases. 406 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 407 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 408 */ 409 if (bs > 1) { 410 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 411 PetscFunctionReturn(0); 412 } 413 414 /* check whether perm is the identity mapping */ 415 PetscCall(ISIdentity(perm, &perm_identity)); 416 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 417 a->permute = PETSC_FALSE; 418 ai = a->i; 419 aj = a->j; 420 PetscCall(ISGetIndices(perm, &rip)); 421 422 /* initialization */ 423 PetscCall(PetscMalloc1(mbs + 1, &ui)); 424 ui[0] = 0; 425 426 /* jl: linked list for storing indices of the pivot rows 427 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 428 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 429 for (i = 0; i < mbs; i++) { 430 jl[i] = mbs; 431 il[i] = 0; 432 } 433 434 /* create and initialize a linked list for storing column indices of the active row k */ 435 nlnk = mbs + 1; 436 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 437 438 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 439 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 440 current_space = free_space; 441 442 for (k = 0; k < mbs; k++) { /* for each active row k */ 443 /* initialize lnk by the column indices of row rip[k] of A */ 444 nzk = 0; 445 ncols = ai[rip[k] + 1] - ai[rip[k]]; 446 for (j = 0; j < ncols; j++) { 447 i = *(aj + ai[rip[k]] + j); 448 cols[j] = rip[i]; 449 } 450 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 451 nzk += nlnk; 452 453 /* update lnk by computing fill-in for each pivot row to be merged in */ 454 prow = jl[k]; /* 1st pivot row */ 455 456 while (prow < k) { 457 nextprow = jl[prow]; 458 /* merge prow into k-th row */ 459 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 460 jmax = ui[prow + 1]; 461 ncols = jmax - jmin; 462 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 463 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 464 nzk += nlnk; 465 466 /* update il and jl for prow */ 467 if (jmin < jmax) { 468 il[prow] = jmin; 469 470 j = *uj_ptr; 471 jl[prow] = jl[j]; 472 jl[j] = prow; 473 } 474 prow = nextprow; 475 } 476 477 /* if free space is not available, make more free space */ 478 if (current_space->local_remaining < nzk) { 479 i = mbs - k + 1; /* num of unfactored rows */ 480 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 481 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 482 reallocs++; 483 } 484 485 /* copy data into free space, then initialize lnk */ 486 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 487 488 /* add the k-th row into il and jl */ 489 if (nzk - 1 > 0) { 490 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 491 jl[k] = jl[i]; 492 jl[i] = k; 493 il[k] = ui[k] + 1; 494 } 495 ui_ptr[k] = current_space->array; 496 497 current_space->array += nzk; 498 current_space->local_used += nzk; 499 current_space->local_remaining -= nzk; 500 501 ui[k + 1] = ui[k] + nzk; 502 } 503 504 PetscCall(ISRestoreIndices(perm, &rip)); 505 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 506 507 /* destroy list of free space and other temporary array(s) */ 508 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 509 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 510 PetscCall(PetscLLDestroy(lnk, lnkbt)); 511 512 /* put together the new matrix in MATSEQSBAIJ format */ 513 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 514 515 b = (Mat_SeqSBAIJ *)fact->data; 516 b->singlemalloc = PETSC_FALSE; 517 b->free_a = PETSC_TRUE; 518 b->free_ij = PETSC_TRUE; 519 520 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 521 522 b->j = uj; 523 b->i = ui; 524 b->diag = NULL; 525 b->ilen = NULL; 526 b->imax = NULL; 527 b->row = perm; 528 529 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 530 531 PetscCall(PetscObjectReference((PetscObject)perm)); 532 b->icol = perm; 533 PetscCall(PetscObjectReference((PetscObject)perm)); 534 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 535 PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[mbs] - mbs) * (sizeof(PetscInt) + sizeof(MatScalar)))); 536 b->maxnz = b->nz = ui[mbs]; 537 538 fact->info.factor_mallocs = reallocs; 539 fact->info.fill_ratio_given = fill; 540 if (ai[mbs] != 0) { 541 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 542 } else { 543 fact->info.fill_ratio_needed = 0.0; 544 } 545 #if defined(PETSC_USE_INFO) 546 if (ai[mbs] != 0) { 547 PetscReal af = fact->info.fill_ratio_needed; 548 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 549 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 550 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 551 } else { 552 PetscCall(PetscInfo(A, "Empty matrix\n")); 553 } 554 #endif 555 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 556 PetscFunctionReturn(0); 557 } 558 559 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) { 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 561 IS perm = b->row; 562 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 563 PetscInt i, j; 564 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 565 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 566 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 567 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 568 MatScalar *work; 569 PetscInt *pivots; 570 PetscBool allowzeropivot, zeropivotdetected; 571 572 PetscFunctionBegin; 573 /* initialization */ 574 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 575 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 576 allowzeropivot = PetscNot(A->erroriffailure); 577 578 il[0] = 0; 579 for (i = 0; i < mbs; i++) jl[i] = mbs; 580 581 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 582 PetscCall(PetscMalloc1(bs, &pivots)); 583 584 PetscCall(ISGetIndices(perm, &perm_ptr)); 585 586 /* check permutation */ 587 if (!a->permute) { 588 ai = a->i; 589 aj = a->j; 590 aa = a->a; 591 } else { 592 ai = a->inew; 593 aj = a->jnew; 594 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 595 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 596 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 597 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 598 599 for (i = 0; i < mbs; i++) { 600 jmin = ai[i]; 601 jmax = ai[i + 1]; 602 for (j = jmin; j < jmax; j++) { 603 while (a2anew[j] != j) { 604 k = a2anew[j]; 605 a2anew[j] = a2anew[k]; 606 a2anew[k] = k; 607 for (k1 = 0; k1 < bs2; k1++) { 608 dk[k1] = aa[k * bs2 + k1]; 609 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 610 aa[j * bs2 + k1] = dk[k1]; 611 } 612 } 613 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 614 if (i > aj[j]) { 615 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 616 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 617 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 618 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 619 } 620 } 621 } 622 } 623 PetscCall(PetscFree(a2anew)); 624 } 625 626 /* for each row k */ 627 for (k = 0; k < mbs; k++) { 628 /*initialize k-th row with elements nonzero in row perm(k) of A */ 629 jmin = ai[perm_ptr[k]]; 630 jmax = ai[perm_ptr[k] + 1]; 631 632 ap = aa + jmin * bs2; 633 for (j = jmin; j < jmax; j++) { 634 vj = perm_ptr[aj[j]]; /* block col. index */ 635 rtmp_ptr = rtmp + vj * bs2; 636 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 637 } 638 639 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 640 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 641 i = jl[k]; /* first row to be added to k_th row */ 642 643 while (i < k) { 644 nexti = jl[i]; /* next row to be added to k_th row */ 645 646 /* compute multiplier */ 647 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 648 649 /* uik = -inv(Di)*U_bar(i,k) */ 650 diag = ba + i * bs2; 651 u = ba + ili * bs2; 652 PetscCall(PetscArrayzero(uik, bs2)); 653 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 654 655 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 656 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 657 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 658 659 /* update -U(i,k) */ 660 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 661 662 /* add multiple of row i to k-th row ... */ 663 jmin = ili + 1; 664 jmax = bi[i + 1]; 665 if (jmin < jmax) { 666 for (j = jmin; j < jmax; j++) { 667 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 668 rtmp_ptr = rtmp + bj[j] * bs2; 669 u = ba + j * bs2; 670 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 671 } 672 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 673 674 /* ... add i to row list for next nonzero entry */ 675 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 676 j = bj[jmin]; 677 jl[i] = jl[j]; 678 jl[j] = i; /* update jl */ 679 } 680 i = nexti; 681 } 682 683 /* save nonzero entries in k-th row of U ... */ 684 685 /* invert diagonal block */ 686 diag = ba + k * bs2; 687 PetscCall(PetscArraycpy(diag, dk, bs2)); 688 689 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 690 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 691 692 jmin = bi[k]; 693 jmax = bi[k + 1]; 694 if (jmin < jmax) { 695 for (j = jmin; j < jmax; j++) { 696 vj = bj[j]; /* block col. index of U */ 697 u = ba + j * bs2; 698 rtmp_ptr = rtmp + vj * bs2; 699 for (k1 = 0; k1 < bs2; k1++) { 700 *u++ = *rtmp_ptr; 701 *rtmp_ptr++ = 0.0; 702 } 703 } 704 705 /* ... add k to row list for first nonzero entry in k-th row */ 706 il[k] = jmin; 707 i = bj[jmin]; 708 jl[k] = jl[i]; 709 jl[i] = k; 710 } 711 } 712 713 PetscCall(PetscFree(rtmp)); 714 PetscCall(PetscFree2(il, jl)); 715 PetscCall(PetscFree3(dk, uik, work)); 716 PetscCall(PetscFree(pivots)); 717 if (a->permute) PetscCall(PetscFree(aa)); 718 719 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 720 721 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 722 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 723 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 724 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 725 726 C->assembled = PETSC_TRUE; 727 C->preallocated = PETSC_TRUE; 728 729 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) { 734 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 735 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 736 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 737 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 738 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 739 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 740 MatScalar *work; 741 PetscInt *pivots; 742 PetscBool allowzeropivot, zeropivotdetected; 743 744 PetscFunctionBegin; 745 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 746 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 747 il[0] = 0; 748 for (i = 0; i < mbs; i++) jl[i] = mbs; 749 750 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 751 PetscCall(PetscMalloc1(bs, &pivots)); 752 allowzeropivot = PetscNot(A->erroriffailure); 753 754 ai = a->i; 755 aj = a->j; 756 aa = a->a; 757 758 /* for each row k */ 759 for (k = 0; k < mbs; k++) { 760 /*initialize k-th row with elements nonzero in row k of A */ 761 jmin = ai[k]; 762 jmax = ai[k + 1]; 763 ap = aa + jmin * bs2; 764 for (j = jmin; j < jmax; j++) { 765 vj = aj[j]; /* block col. index */ 766 rtmp_ptr = rtmp + vj * bs2; 767 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 768 } 769 770 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 771 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 772 i = jl[k]; /* first row to be added to k_th row */ 773 774 while (i < k) { 775 nexti = jl[i]; /* next row to be added to k_th row */ 776 777 /* compute multiplier */ 778 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 779 780 /* uik = -inv(Di)*U_bar(i,k) */ 781 diag = ba + i * bs2; 782 u = ba + ili * bs2; 783 PetscCall(PetscArrayzero(uik, bs2)); 784 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 785 786 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 787 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 788 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 789 790 /* update -U(i,k) */ 791 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 792 793 /* add multiple of row i to k-th row ... */ 794 jmin = ili + 1; 795 jmax = bi[i + 1]; 796 if (jmin < jmax) { 797 for (j = jmin; j < jmax; j++) { 798 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 799 rtmp_ptr = rtmp + bj[j] * bs2; 800 u = ba + j * bs2; 801 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 802 } 803 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 804 805 /* ... add i to row list for next nonzero entry */ 806 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 807 j = bj[jmin]; 808 jl[i] = jl[j]; 809 jl[j] = i; /* update jl */ 810 } 811 i = nexti; 812 } 813 814 /* save nonzero entries in k-th row of U ... */ 815 816 /* invert diagonal block */ 817 diag = ba + k * bs2; 818 PetscCall(PetscArraycpy(diag, dk, bs2)); 819 820 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 821 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 822 823 jmin = bi[k]; 824 jmax = bi[k + 1]; 825 if (jmin < jmax) { 826 for (j = jmin; j < jmax; j++) { 827 vj = bj[j]; /* block col. index of U */ 828 u = ba + j * bs2; 829 rtmp_ptr = rtmp + vj * bs2; 830 for (k1 = 0; k1 < bs2; k1++) { 831 *u++ = *rtmp_ptr; 832 *rtmp_ptr++ = 0.0; 833 } 834 } 835 836 /* ... add k to row list for first nonzero entry in k-th row */ 837 il[k] = jmin; 838 i = bj[jmin]; 839 jl[k] = jl[i]; 840 jl[i] = k; 841 } 842 } 843 844 PetscCall(PetscFree(rtmp)); 845 PetscCall(PetscFree2(il, jl)); 846 PetscCall(PetscFree3(dk, uik, work)); 847 PetscCall(PetscFree(pivots)); 848 849 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 850 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 851 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 852 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 853 C->assembled = PETSC_TRUE; 854 C->preallocated = PETSC_TRUE; 855 856 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 857 PetscFunctionReturn(0); 858 } 859 860 /* 861 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 862 Version for blocks 2 by 2. 863 */ 864 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) { 865 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 866 IS perm = b->row; 867 const PetscInt *ai, *aj, *perm_ptr; 868 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 869 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 870 MatScalar *ba = b->a, *aa, *ap; 871 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 872 PetscReal shift = info->shiftamount; 873 PetscBool allowzeropivot, zeropivotdetected; 874 875 PetscFunctionBegin; 876 allowzeropivot = PetscNot(A->erroriffailure); 877 878 /* initialization */ 879 /* il and jl record the first nonzero element in each row of the accessing 880 window U(0:k, k:mbs-1). 881 jl: list of rows to be added to uneliminated rows 882 i>= k: jl(i) is the first row to be added to row i 883 i< k: jl(i) is the row following row i in some list of rows 884 jl(i) = mbs indicates the end of a list 885 il(i): points to the first nonzero element in columns k,...,mbs-1 of 886 row i of U */ 887 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 888 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 889 il[0] = 0; 890 for (i = 0; i < mbs; i++) jl[i] = mbs; 891 892 PetscCall(ISGetIndices(perm, &perm_ptr)); 893 894 /* check permutation */ 895 if (!a->permute) { 896 ai = a->i; 897 aj = a->j; 898 aa = a->a; 899 } else { 900 ai = a->inew; 901 aj = a->jnew; 902 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 903 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 904 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 905 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 906 907 for (i = 0; i < mbs; i++) { 908 jmin = ai[i]; 909 jmax = ai[i + 1]; 910 for (j = jmin; j < jmax; j++) { 911 while (a2anew[j] != j) { 912 k = a2anew[j]; 913 a2anew[j] = a2anew[k]; 914 a2anew[k] = k; 915 for (k1 = 0; k1 < 4; k1++) { 916 dk[k1] = aa[k * 4 + k1]; 917 aa[k * 4 + k1] = aa[j * 4 + k1]; 918 aa[j * 4 + k1] = dk[k1]; 919 } 920 } 921 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 922 if (i > aj[j]) { 923 ap = aa + j * 4; /* ptr to the beginning of the block */ 924 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 925 ap[1] = ap[2]; 926 ap[2] = dk[1]; 927 } 928 } 929 } 930 PetscCall(PetscFree(a2anew)); 931 } 932 933 /* for each row k */ 934 for (k = 0; k < mbs; k++) { 935 /*initialize k-th row with elements nonzero in row perm(k) of A */ 936 jmin = ai[perm_ptr[k]]; 937 jmax = ai[perm_ptr[k] + 1]; 938 ap = aa + jmin * 4; 939 for (j = jmin; j < jmax; j++) { 940 vj = perm_ptr[aj[j]]; /* block col. index */ 941 rtmp_ptr = rtmp + vj * 4; 942 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 943 } 944 945 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 946 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 947 i = jl[k]; /* first row to be added to k_th row */ 948 949 while (i < k) { 950 nexti = jl[i]; /* next row to be added to k_th row */ 951 952 /* compute multiplier */ 953 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 954 955 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 956 diag = ba + i * 4; 957 u = ba + ili * 4; 958 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 959 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 960 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 961 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 962 963 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 964 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 965 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 966 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 967 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 968 969 PetscCall(PetscLogFlops(16.0 * 2.0)); 970 971 /* update -U(i,k): ba[ili] = uik */ 972 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 973 974 /* add multiple of row i to k-th row ... */ 975 jmin = ili + 1; 976 jmax = bi[i + 1]; 977 if (jmin < jmax) { 978 for (j = jmin; j < jmax; j++) { 979 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 980 rtmp_ptr = rtmp + bj[j] * 4; 981 u = ba + j * 4; 982 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 983 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 984 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 985 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 986 } 987 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 988 989 /* ... add i to row list for next nonzero entry */ 990 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 991 j = bj[jmin]; 992 jl[i] = jl[j]; 993 jl[j] = i; /* update jl */ 994 } 995 i = nexti; 996 } 997 998 /* save nonzero entries in k-th row of U ... */ 999 1000 /* invert diagonal block */ 1001 diag = ba + k * 4; 1002 PetscCall(PetscArraycpy(diag, dk, 4)); 1003 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1004 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1005 1006 jmin = bi[k]; 1007 jmax = bi[k + 1]; 1008 if (jmin < jmax) { 1009 for (j = jmin; j < jmax; j++) { 1010 vj = bj[j]; /* block col. index of U */ 1011 u = ba + j * 4; 1012 rtmp_ptr = rtmp + vj * 4; 1013 for (k1 = 0; k1 < 4; k1++) { 1014 *u++ = *rtmp_ptr; 1015 *rtmp_ptr++ = 0.0; 1016 } 1017 } 1018 1019 /* ... add k to row list for first nonzero entry in k-th row */ 1020 il[k] = jmin; 1021 i = bj[jmin]; 1022 jl[k] = jl[i]; 1023 jl[i] = k; 1024 } 1025 } 1026 1027 PetscCall(PetscFree(rtmp)); 1028 PetscCall(PetscFree2(il, jl)); 1029 if (a->permute) PetscCall(PetscFree(aa)); 1030 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1031 1032 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1033 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1034 C->assembled = PETSC_TRUE; 1035 C->preallocated = PETSC_TRUE; 1036 1037 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1038 PetscFunctionReturn(0); 1039 } 1040 1041 /* 1042 Version for when blocks are 2 by 2 Using natural ordering 1043 */ 1044 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) { 1045 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1046 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1047 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1048 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1049 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1050 PetscReal shift = info->shiftamount; 1051 PetscBool allowzeropivot, zeropivotdetected; 1052 1053 PetscFunctionBegin; 1054 allowzeropivot = PetscNot(A->erroriffailure); 1055 1056 /* initialization */ 1057 /* il and jl record the first nonzero element in each row of the accessing 1058 window U(0:k, k:mbs-1). 1059 jl: list of rows to be added to uneliminated rows 1060 i>= k: jl(i) is the first row to be added to row i 1061 i< k: jl(i) is the row following row i in some list of rows 1062 jl(i) = mbs indicates the end of a list 1063 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1064 row i of U */ 1065 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1066 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1067 il[0] = 0; 1068 for (i = 0; i < mbs; i++) jl[i] = mbs; 1069 1070 ai = a->i; 1071 aj = a->j; 1072 aa = a->a; 1073 1074 /* for each row k */ 1075 for (k = 0; k < mbs; k++) { 1076 /*initialize k-th row with elements nonzero in row k of A */ 1077 jmin = ai[k]; 1078 jmax = ai[k + 1]; 1079 ap = aa + jmin * 4; 1080 for (j = jmin; j < jmax; j++) { 1081 vj = aj[j]; /* block col. index */ 1082 rtmp_ptr = rtmp + vj * 4; 1083 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1084 } 1085 1086 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1087 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1088 i = jl[k]; /* first row to be added to k_th row */ 1089 1090 while (i < k) { 1091 nexti = jl[i]; /* next row to be added to k_th row */ 1092 1093 /* compute multiplier */ 1094 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1095 1096 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1097 diag = ba + i * 4; 1098 u = ba + ili * 4; 1099 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1100 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1101 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1102 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1103 1104 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1105 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1106 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1107 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1108 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1109 1110 PetscCall(PetscLogFlops(16.0 * 2.0)); 1111 1112 /* update -U(i,k): ba[ili] = uik */ 1113 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1114 1115 /* add multiple of row i to k-th row ... */ 1116 jmin = ili + 1; 1117 jmax = bi[i + 1]; 1118 if (jmin < jmax) { 1119 for (j = jmin; j < jmax; j++) { 1120 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1121 rtmp_ptr = rtmp + bj[j] * 4; 1122 u = ba + j * 4; 1123 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1124 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1125 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1126 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1127 } 1128 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1129 1130 /* ... add i to row list for next nonzero entry */ 1131 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1132 j = bj[jmin]; 1133 jl[i] = jl[j]; 1134 jl[j] = i; /* update jl */ 1135 } 1136 i = nexti; 1137 } 1138 1139 /* save nonzero entries in k-th row of U ... */ 1140 1141 /* invert diagonal block */ 1142 diag = ba + k * 4; 1143 PetscCall(PetscArraycpy(diag, dk, 4)); 1144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1145 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1146 1147 jmin = bi[k]; 1148 jmax = bi[k + 1]; 1149 if (jmin < jmax) { 1150 for (j = jmin; j < jmax; j++) { 1151 vj = bj[j]; /* block col. index of U */ 1152 u = ba + j * 4; 1153 rtmp_ptr = rtmp + vj * 4; 1154 for (k1 = 0; k1 < 4; k1++) { 1155 *u++ = *rtmp_ptr; 1156 *rtmp_ptr++ = 0.0; 1157 } 1158 } 1159 1160 /* ... add k to row list for first nonzero entry in k-th row */ 1161 il[k] = jmin; 1162 i = bj[jmin]; 1163 jl[k] = jl[i]; 1164 jl[i] = k; 1165 } 1166 } 1167 1168 PetscCall(PetscFree(rtmp)); 1169 PetscCall(PetscFree2(il, jl)); 1170 1171 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1172 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1173 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1174 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1175 C->assembled = PETSC_TRUE; 1176 C->preallocated = PETSC_TRUE; 1177 1178 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1179 PetscFunctionReturn(0); 1180 } 1181 1182 /* 1183 Numeric U^T*D*U factorization for SBAIJ format. 1184 Version for blocks are 1 by 1. 1185 */ 1186 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) { 1187 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1188 IS ip = b->row; 1189 const PetscInt *ai, *aj, *rip; 1190 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1191 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1192 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1193 PetscReal rs; 1194 FactorShiftCtx sctx; 1195 1196 PetscFunctionBegin; 1197 /* MatPivotSetUp(): initialize shift context sctx */ 1198 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1199 1200 PetscCall(ISGetIndices(ip, &rip)); 1201 if (!a->permute) { 1202 ai = a->i; 1203 aj = a->j; 1204 aa = a->a; 1205 } else { 1206 ai = a->inew; 1207 aj = a->jnew; 1208 nz = ai[mbs]; 1209 PetscCall(PetscMalloc1(nz, &aa)); 1210 a2anew = a->a2anew; 1211 bval = a->a; 1212 for (j = 0; j < nz; j++) { aa[a2anew[j]] = *(bval++); } 1213 } 1214 1215 /* initialization */ 1216 /* il and jl record the first nonzero element in each row of the accessing 1217 window U(0:k, k:mbs-1). 1218 jl: list of rows to be added to uneliminated rows 1219 i>= k: jl(i) is the first row to be added to row i 1220 i< k: jl(i) is the row following row i in some list of rows 1221 jl(i) = mbs indicates the end of a list 1222 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1223 row i of U */ 1224 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1225 1226 do { 1227 sctx.newshift = PETSC_FALSE; 1228 il[0] = 0; 1229 for (i = 0; i < mbs; i++) { 1230 rtmp[i] = 0.0; 1231 jl[i] = mbs; 1232 } 1233 1234 for (k = 0; k < mbs; k++) { 1235 /*initialize k-th row by the perm[k]-th row of A */ 1236 jmin = ai[rip[k]]; 1237 jmax = ai[rip[k] + 1]; 1238 bval = ba + bi[k]; 1239 for (j = jmin; j < jmax; j++) { 1240 col = rip[aj[j]]; 1241 rtmp[col] = aa[j]; 1242 *bval++ = 0.0; /* for in-place factorization */ 1243 } 1244 1245 /* shift the diagonal of the matrix */ 1246 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1247 1248 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1249 dk = rtmp[k]; 1250 i = jl[k]; /* first row to be added to k_th row */ 1251 1252 while (i < k) { 1253 nexti = jl[i]; /* next row to be added to k_th row */ 1254 1255 /* compute multiplier, update diag(k) and U(i,k) */ 1256 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1257 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1258 dk += uikdi * ba[ili]; 1259 ba[ili] = uikdi; /* -U(i,k) */ 1260 1261 /* add multiple of row i to k-th row */ 1262 jmin = ili + 1; 1263 jmax = bi[i + 1]; 1264 if (jmin < jmax) { 1265 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1266 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1267 1268 /* update il and jl for row i */ 1269 il[i] = jmin; 1270 j = bj[jmin]; 1271 jl[i] = jl[j]; 1272 jl[j] = i; 1273 } 1274 i = nexti; 1275 } 1276 1277 /* shift the diagonals when zero pivot is detected */ 1278 /* compute rs=sum of abs(off-diagonal) */ 1279 rs = 0.0; 1280 jmin = bi[k] + 1; 1281 nz = bi[k + 1] - jmin; 1282 if (nz) { 1283 bcol = bj + jmin; 1284 while (nz--) { 1285 rs += PetscAbsScalar(rtmp[*bcol]); 1286 bcol++; 1287 } 1288 } 1289 1290 sctx.rs = rs; 1291 sctx.pv = dk; 1292 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1293 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1294 dk = sctx.pv; 1295 1296 /* copy data into U(k,:) */ 1297 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1298 jmin = bi[k] + 1; 1299 jmax = bi[k + 1]; 1300 if (jmin < jmax) { 1301 for (j = jmin; j < jmax; j++) { 1302 col = bj[j]; 1303 ba[j] = rtmp[col]; 1304 rtmp[col] = 0.0; 1305 } 1306 /* add the k-th row into il and jl */ 1307 il[k] = jmin; 1308 i = bj[jmin]; 1309 jl[k] = jl[i]; 1310 jl[i] = k; 1311 } 1312 } 1313 } while (sctx.newshift); 1314 PetscCall(PetscFree3(rtmp, il, jl)); 1315 if (a->permute) PetscCall(PetscFree(aa)); 1316 1317 PetscCall(ISRestoreIndices(ip, &rip)); 1318 1319 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1320 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1321 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1322 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1323 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1324 C->assembled = PETSC_TRUE; 1325 C->preallocated = PETSC_TRUE; 1326 1327 PetscCall(PetscLogFlops(C->rmap->N)); 1328 if (sctx.nshift) { 1329 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1330 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1331 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1332 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1333 } 1334 } 1335 PetscFunctionReturn(0); 1336 } 1337 1338 /* 1339 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1340 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1341 */ 1342 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) { 1343 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1344 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1345 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1346 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1347 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1348 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1349 FactorShiftCtx sctx; 1350 PetscReal rs; 1351 MatScalar d, *v; 1352 1353 PetscFunctionBegin; 1354 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1355 1356 /* MatPivotSetUp(): initialize shift context sctx */ 1357 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1358 1359 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1360 sctx.shift_top = info->zeropivot; 1361 1362 PetscCall(PetscArrayzero(rtmp, mbs)); 1363 1364 for (i = 0; i < mbs; i++) { 1365 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1366 d = (aa)[a->diag[i]]; 1367 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1368 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1369 v = aa + ai[i] + 1; 1370 nz = ai[i + 1] - ai[i] - 1; 1371 for (j = 0; j < nz; j++) { 1372 rtmp[i] += PetscAbsScalar(v[j]); 1373 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1374 } 1375 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1376 } 1377 sctx.shift_top *= 1.1; 1378 sctx.nshift_max = 5; 1379 sctx.shift_lo = 0.; 1380 sctx.shift_hi = 1.; 1381 } 1382 1383 /* allocate working arrays 1384 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1385 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1386 */ 1387 do { 1388 sctx.newshift = PETSC_FALSE; 1389 1390 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1391 if (mbs) il[0] = 0; 1392 1393 for (k = 0; k < mbs; k++) { 1394 /* zero rtmp */ 1395 nz = bi[k + 1] - bi[k]; 1396 bjtmp = bj + bi[k]; 1397 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1398 1399 /* load in initial unfactored row */ 1400 bval = ba + bi[k]; 1401 jmin = ai[k]; 1402 jmax = ai[k + 1]; 1403 for (j = jmin; j < jmax; j++) { 1404 col = aj[j]; 1405 rtmp[col] = aa[j]; 1406 *bval++ = 0.0; /* for in-place factorization */ 1407 } 1408 /* shift the diagonal of the matrix: ZeropivotApply() */ 1409 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1410 1411 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1412 dk = rtmp[k]; 1413 i = c2r[k]; /* first row to be added to k_th row */ 1414 1415 while (i < k) { 1416 nexti = c2r[i]; /* next row to be added to k_th row */ 1417 1418 /* compute multiplier, update diag(k) and U(i,k) */ 1419 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1420 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1421 dk += uikdi * ba[ili]; /* update diag[k] */ 1422 ba[ili] = uikdi; /* -U(i,k) */ 1423 1424 /* add multiple of row i to k-th row */ 1425 jmin = ili + 1; 1426 jmax = bi[i + 1]; 1427 if (jmin < jmax) { 1428 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1429 /* update il and c2r for row i */ 1430 il[i] = jmin; 1431 j = bj[jmin]; 1432 c2r[i] = c2r[j]; 1433 c2r[j] = i; 1434 } 1435 i = nexti; 1436 } 1437 1438 /* copy data into U(k,:) */ 1439 rs = 0.0; 1440 jmin = bi[k]; 1441 jmax = bi[k + 1] - 1; 1442 if (jmin < jmax) { 1443 for (j = jmin; j < jmax; j++) { 1444 col = bj[j]; 1445 ba[j] = rtmp[col]; 1446 rs += PetscAbsScalar(ba[j]); 1447 } 1448 /* add the k-th row into il and c2r */ 1449 il[k] = jmin; 1450 i = bj[jmin]; 1451 c2r[k] = c2r[i]; 1452 c2r[i] = k; 1453 } 1454 1455 sctx.rs = rs; 1456 sctx.pv = dk; 1457 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1458 if (sctx.newshift) break; 1459 dk = sctx.pv; 1460 1461 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1462 } 1463 } while (sctx.newshift); 1464 1465 PetscCall(PetscFree3(rtmp, il, c2r)); 1466 1467 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1468 B->ops->solves = MatSolves_SeqSBAIJ_1; 1469 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1470 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1471 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1472 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1473 1474 B->assembled = PETSC_TRUE; 1475 B->preallocated = PETSC_TRUE; 1476 1477 PetscCall(PetscLogFlops(B->rmap->n)); 1478 1479 /* MatPivotView() */ 1480 if (sctx.nshift) { 1481 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1482 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1483 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1484 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1485 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1486 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1487 } 1488 } 1489 PetscFunctionReturn(0); 1490 } 1491 1492 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) { 1493 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1494 PetscInt i, j, mbs = a->mbs; 1495 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1496 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1497 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1498 PetscReal rs; 1499 FactorShiftCtx sctx; 1500 1501 PetscFunctionBegin; 1502 /* MatPivotSetUp(): initialize shift context sctx */ 1503 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1504 1505 /* initialization */ 1506 /* il and jl record the first nonzero element in each row of the accessing 1507 window U(0:k, k:mbs-1). 1508 jl: list of rows to be added to uneliminated rows 1509 i>= k: jl(i) is the first row to be added to row i 1510 i< k: jl(i) is the row following row i in some list of rows 1511 jl(i) = mbs indicates the end of a list 1512 il(i): points to the first nonzero element in U(i,k:mbs-1) 1513 */ 1514 PetscCall(PetscMalloc1(mbs, &rtmp)); 1515 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1516 1517 do { 1518 sctx.newshift = PETSC_FALSE; 1519 il[0] = 0; 1520 for (i = 0; i < mbs; i++) { 1521 rtmp[i] = 0.0; 1522 jl[i] = mbs; 1523 } 1524 1525 for (k = 0; k < mbs; k++) { 1526 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1527 nz = ai[k + 1] - ai[k]; 1528 acol = aj + ai[k]; 1529 aval = aa + ai[k]; 1530 bval = ba + bi[k]; 1531 while (nz--) { 1532 rtmp[*acol++] = *aval++; 1533 *bval++ = 0.0; /* for in-place factorization */ 1534 } 1535 1536 /* shift the diagonal of the matrix */ 1537 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1538 1539 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1540 dk = rtmp[k]; 1541 i = jl[k]; /* first row to be added to k_th row */ 1542 1543 while (i < k) { 1544 nexti = jl[i]; /* next row to be added to k_th row */ 1545 /* compute multiplier, update D(k) and U(i,k) */ 1546 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1547 uikdi = -ba[ili] * ba[bi[i]]; 1548 dk += uikdi * ba[ili]; 1549 ba[ili] = uikdi; /* -U(i,k) */ 1550 1551 /* add multiple of row i to k-th row ... */ 1552 jmin = ili + 1; 1553 nz = bi[i + 1] - jmin; 1554 if (nz > 0) { 1555 bcol = bj + jmin; 1556 bval = ba + jmin; 1557 PetscCall(PetscLogFlops(2.0 * nz)); 1558 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1559 1560 /* update il and jl for i-th row */ 1561 il[i] = jmin; 1562 j = bj[jmin]; 1563 jl[i] = jl[j]; 1564 jl[j] = i; 1565 } 1566 i = nexti; 1567 } 1568 1569 /* shift the diagonals when zero pivot is detected */ 1570 /* compute rs=sum of abs(off-diagonal) */ 1571 rs = 0.0; 1572 jmin = bi[k] + 1; 1573 nz = bi[k + 1] - jmin; 1574 if (nz) { 1575 bcol = bj + jmin; 1576 while (nz--) { 1577 rs += PetscAbsScalar(rtmp[*bcol]); 1578 bcol++; 1579 } 1580 } 1581 1582 sctx.rs = rs; 1583 sctx.pv = dk; 1584 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1585 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1586 dk = sctx.pv; 1587 1588 /* copy data into U(k,:) */ 1589 ba[bi[k]] = 1.0 / dk; 1590 jmin = bi[k] + 1; 1591 nz = bi[k + 1] - jmin; 1592 if (nz) { 1593 bcol = bj + jmin; 1594 bval = ba + jmin; 1595 while (nz--) { 1596 *bval++ = rtmp[*bcol]; 1597 rtmp[*bcol++] = 0.0; 1598 } 1599 /* add k-th row into il and jl */ 1600 il[k] = jmin; 1601 i = bj[jmin]; 1602 jl[k] = jl[i]; 1603 jl[i] = k; 1604 } 1605 } /* end of for (k = 0; k<mbs; k++) */ 1606 } while (sctx.newshift); 1607 PetscCall(PetscFree(rtmp)); 1608 PetscCall(PetscFree2(il, jl)); 1609 1610 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1611 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1612 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1613 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1614 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1615 1616 C->assembled = PETSC_TRUE; 1617 C->preallocated = PETSC_TRUE; 1618 1619 PetscCall(PetscLogFlops(C->rmap->N)); 1620 if (sctx.nshift) { 1621 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1622 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1623 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1624 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) { 1631 Mat C; 1632 1633 PetscFunctionBegin; 1634 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1635 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1636 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1637 1638 A->ops->solve = C->ops->solve; 1639 A->ops->solvetranspose = C->ops->solvetranspose; 1640 1641 PetscCall(MatHeaderMerge(A, &C)); 1642 PetscFunctionReturn(0); 1643 } 1644