1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) { 13 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 14 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 15 const PetscInt *ai = a->i, *aj = a->j; 16 const PetscScalar *aa = a->a; 17 PetscBool *done; 18 PetscReal best, past = 0, future; 19 20 PetscFunctionBegin; 21 /* pick initial row */ 22 best = -1; 23 for (i = 0; i < n; i++) { 24 future = 0.0; 25 for (j = ai[i]; j < ai[i + 1]; j++) { 26 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 27 else past = PetscAbsScalar(aa[j]); 28 } 29 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 30 if (past / future > best) { 31 best = past / future; 32 current = i; 33 } 34 } 35 36 PetscCall(PetscMalloc1(n, &done)); 37 PetscCall(PetscArrayzero(done, n)); 38 PetscCall(PetscMalloc1(n, &order)); 39 order[0] = current; 40 for (i = 0; i < n - 1; i++) { 41 done[current] = PETSC_TRUE; 42 best = -1; 43 /* loop over all neighbors of current pivot */ 44 for (j = ai[current]; j < ai[current + 1]; j++) { 45 jj = aj[j]; 46 if (done[jj]) continue; 47 /* loop over columns of potential next row computing weights for below and above diagonal */ 48 past = future = 0.0; 49 for (k = ai[jj]; k < ai[jj + 1]; k++) { 50 kk = aj[k]; 51 if (done[kk]) past += PetscAbsScalar(aa[k]); 52 else if (kk != jj) future += PetscAbsScalar(aa[k]); 53 } 54 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 55 if (past / future > best) { 56 best = past / future; 57 newcurrent = jj; 58 } 59 } 60 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 61 best = -1; 62 for (k = 0; k < n; k++) { 63 if (done[k]) continue; 64 future = 0.0; 65 past = 0.0; 66 for (j = ai[k]; j < ai[k + 1]; j++) { 67 kk = aj[j]; 68 if (done[kk]) past += PetscAbsScalar(aa[j]); 69 else if (kk != k) future += PetscAbsScalar(aa[j]); 70 } 71 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 72 if (past / future > best) { 73 best = past / future; 74 newcurrent = k; 75 } 76 } 77 } 78 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 79 current = newcurrent; 80 order[i + 1] = current; 81 } 82 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 83 *icol = *irow; 84 PetscCall(PetscObjectReference((PetscObject)*irow)); 85 PetscCall(PetscFree(done)); 86 PetscCall(PetscFree(order)); 87 PetscFunctionReturn(0); 88 } 89 90 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) { 91 PetscFunctionBegin; 92 *type = MATSOLVERPETSC; 93 PetscFunctionReturn(0); 94 } 95 96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) { 97 PetscInt n = A->rmap->n; 98 99 PetscFunctionBegin; 100 #if defined(PETSC_USE_COMPLEX) 101 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 102 #endif 103 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 104 PetscCall(MatSetSizes(*B, n, n, n, n)); 105 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 106 PetscCall(MatSetType(*B, MATSEQAIJ)); 107 108 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 109 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 110 111 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 112 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 113 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 114 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 115 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 116 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 117 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 118 119 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 120 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 123 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 124 (*B)->factortype = ftype; 125 126 PetscCall(PetscFree((*B)->solvertype)); 127 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 128 (*B)->canuseordering = PETSC_TRUE; 129 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 134 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 135 IS isicol; 136 const PetscInt *r, *ic; 137 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 138 PetscInt *bi, *bj, *ajtmp; 139 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 140 PetscReal f; 141 PetscInt nlnk, *lnk, k, **bi_ptr; 142 PetscFreeSpaceList free_space = NULL, current_space = NULL; 143 PetscBT lnkbt; 144 PetscBool missing; 145 146 PetscFunctionBegin; 147 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 148 PetscCall(MatMissingDiagonal(A, &missing, &i)); 149 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 150 151 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 152 PetscCall(ISGetIndices(isrow, &r)); 153 PetscCall(ISGetIndices(isicol, &ic)); 154 155 /* get new row pointers */ 156 PetscCall(PetscMalloc1(n + 1, &bi)); 157 bi[0] = 0; 158 159 /* bdiag is location of diagonal in factor */ 160 PetscCall(PetscMalloc1(n + 1, &bdiag)); 161 bdiag[0] = 0; 162 163 /* linked list for storing column indices of the active row */ 164 nlnk = n + 1; 165 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 166 167 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 168 169 /* initial FreeSpace size is f*(ai[n]+1) */ 170 f = info->fill; 171 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 172 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 173 current_space = free_space; 174 175 for (i = 0; i < n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i] + 1] - ai[r[i]]; 179 ajtmp = aj + ai[r[i]]; 180 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 181 nzi += nlnk; 182 183 /* add pivot rows into linked list */ 184 row = lnk[n]; 185 while (row < i) { 186 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 188 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 189 nzi += nlnk; 190 row = lnk[row]; 191 } 192 bi[i + 1] = bi[i] + nzi; 193 im[i] = nzi; 194 195 /* mark bdiag */ 196 nzbd = 0; 197 nnz = nzi; 198 k = lnk[n]; 199 while (nnz-- && k < i) { 200 nzbd++; 201 k = lnk[k]; 202 } 203 bdiag[i] = bi[i] + nzbd; 204 205 /* if free space is not available, make more free space */ 206 if (current_space->local_remaining < nzi) { 207 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 208 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 209 reallocs++; 210 } 211 212 /* copy data into free space, then initialize lnk */ 213 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 214 215 bi_ptr[i] = current_space->array; 216 current_space->array += nzi; 217 current_space->local_used += nzi; 218 current_space->local_remaining -= nzi; 219 } 220 #if defined(PETSC_USE_INFO) 221 if (ai[n] != 0) { 222 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 223 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 224 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 225 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 226 PetscCall(PetscInfo(A, "for best performance.\n")); 227 } else { 228 PetscCall(PetscInfo(A, "Empty matrix\n")); 229 } 230 #endif 231 232 PetscCall(ISRestoreIndices(isrow, &r)); 233 PetscCall(ISRestoreIndices(isicol, &ic)); 234 235 /* destroy list of free space and other temporary array(s) */ 236 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 237 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 238 PetscCall(PetscLLDestroy(lnk, lnkbt)); 239 PetscCall(PetscFree2(bi_ptr, im)); 240 241 /* put together the new matrix */ 242 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 243 b = (Mat_SeqAIJ *)(B)->data; 244 245 b->free_a = PETSC_TRUE; 246 b->free_ij = PETSC_TRUE; 247 b->singlemalloc = PETSC_FALSE; 248 249 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 250 b->j = bj; 251 b->i = bi; 252 b->diag = bdiag; 253 b->ilen = NULL; 254 b->imax = NULL; 255 b->row = isrow; 256 b->col = iscol; 257 PetscCall(PetscObjectReference((PetscObject)isrow)); 258 PetscCall(PetscObjectReference((PetscObject)iscol)); 259 b->icol = isicol; 260 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 261 262 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 263 b->maxnz = b->nz = bi[n]; 264 265 (B)->factortype = MAT_FACTOR_LU; 266 (B)->info.factor_mallocs = reallocs; 267 (B)->info.fill_ratio_given = f; 268 269 if (ai[n]) { 270 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 271 } else { 272 (B)->info.fill_ratio_needed = 0.0; 273 } 274 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 275 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 276 PetscFunctionReturn(0); 277 } 278 279 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 280 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 281 IS isicol; 282 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 283 PetscInt i, n = A->rmap->n; 284 PetscInt *bi, *bj; 285 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 286 PetscReal f; 287 PetscInt nlnk, *lnk, k, **bi_ptr; 288 PetscFreeSpaceList free_space = NULL, current_space = NULL; 289 PetscBT lnkbt; 290 PetscBool missing; 291 292 PetscFunctionBegin; 293 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 294 PetscCall(MatMissingDiagonal(A, &missing, &i)); 295 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 296 297 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 298 PetscCall(ISGetIndices(isrow, &r)); 299 PetscCall(ISGetIndices(isicol, &ic)); 300 301 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 302 PetscCall(PetscMalloc1(n + 1, &bi)); 303 PetscCall(PetscMalloc1(n + 1, &bdiag)); 304 bi[0] = bdiag[0] = 0; 305 306 /* linked list for storing column indices of the active row */ 307 nlnk = n + 1; 308 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 309 310 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 311 312 /* initial FreeSpace size is f*(ai[n]+1) */ 313 f = info->fill; 314 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 315 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 316 current_space = free_space; 317 318 for (i = 0; i < n; i++) { 319 /* copy previous fill into linked list */ 320 nzi = 0; 321 nnz = ai[r[i] + 1] - ai[r[i]]; 322 ajtmp = aj + ai[r[i]]; 323 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 324 nzi += nlnk; 325 326 /* add pivot rows into linked list */ 327 row = lnk[n]; 328 while (row < i) { 329 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 330 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 331 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 332 nzi += nlnk; 333 row = lnk[row]; 334 } 335 bi[i + 1] = bi[i] + nzi; 336 im[i] = nzi; 337 338 /* mark bdiag */ 339 nzbd = 0; 340 nnz = nzi; 341 k = lnk[n]; 342 while (nnz-- && k < i) { 343 nzbd++; 344 k = lnk[k]; 345 } 346 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 347 348 /* if free space is not available, make more free space */ 349 if (current_space->local_remaining < nzi) { 350 /* estimated additional space needed */ 351 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 352 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 353 reallocs++; 354 } 355 356 /* copy data into free space, then initialize lnk */ 357 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 358 359 bi_ptr[i] = current_space->array; 360 current_space->array += nzi; 361 current_space->local_used += nzi; 362 current_space->local_remaining -= nzi; 363 } 364 365 PetscCall(ISRestoreIndices(isrow, &r)); 366 PetscCall(ISRestoreIndices(isicol, &ic)); 367 368 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 369 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 370 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 371 PetscCall(PetscLLDestroy(lnk, lnkbt)); 372 PetscCall(PetscFree2(bi_ptr, im)); 373 374 /* put together the new matrix */ 375 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 376 b = (Mat_SeqAIJ *)(B)->data; 377 378 b->free_a = PETSC_TRUE; 379 b->free_ij = PETSC_TRUE; 380 b->singlemalloc = PETSC_FALSE; 381 382 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 383 384 b->j = bj; 385 b->i = bi; 386 b->diag = bdiag; 387 b->ilen = NULL; 388 b->imax = NULL; 389 b->row = isrow; 390 b->col = iscol; 391 PetscCall(PetscObjectReference((PetscObject)isrow)); 392 PetscCall(PetscObjectReference((PetscObject)iscol)); 393 b->icol = isicol; 394 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 395 396 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 397 b->maxnz = b->nz = bdiag[0] + 1; 398 399 B->factortype = MAT_FACTOR_LU; 400 B->info.factor_mallocs = reallocs; 401 B->info.fill_ratio_given = f; 402 403 if (ai[n]) { 404 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 405 } else { 406 B->info.fill_ratio_needed = 0.0; 407 } 408 #if defined(PETSC_USE_INFO) 409 if (ai[n] != 0) { 410 PetscReal af = B->info.fill_ratio_needed; 411 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 412 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 413 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 414 PetscCall(PetscInfo(A, "for best performance.\n")); 415 } else { 416 PetscCall(PetscInfo(A, "Empty matrix\n")); 417 } 418 #endif 419 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 420 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 421 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 422 PetscFunctionReturn(0); 423 } 424 425 /* 426 Trouble in factorization, should we dump the original matrix? 427 */ 428 PetscErrorCode MatFactorDumpMatrix(Mat A) { 429 PetscBool flg = PETSC_FALSE; 430 431 PetscFunctionBegin; 432 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 433 if (flg) { 434 PetscViewer viewer; 435 char filename[PETSC_MAX_PATH_LEN]; 436 437 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 438 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 439 PetscCall(MatView(A, viewer)); 440 PetscCall(PetscViewerDestroy(&viewer)); 441 } 442 PetscFunctionReturn(0); 443 } 444 445 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 446 Mat C = B; 447 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 448 IS isrow = b->row, isicol = b->icol; 449 const PetscInt *r, *ic, *ics; 450 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 451 PetscInt i, j, k, nz, nzL, row, *pj; 452 const PetscInt *ajtmp, *bjtmp; 453 MatScalar *rtmp, *pc, multiplier, *pv; 454 const MatScalar *aa = a->a, *v; 455 PetscBool row_identity, col_identity; 456 FactorShiftCtx sctx; 457 const PetscInt *ddiag; 458 PetscReal rs; 459 MatScalar d; 460 461 PetscFunctionBegin; 462 /* MatPivotSetUp(): initialize shift context sctx */ 463 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 464 465 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 466 ddiag = a->diag; 467 sctx.shift_top = info->zeropivot; 468 for (i = 0; i < n; i++) { 469 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 470 d = (aa)[ddiag[i]]; 471 rs = -PetscAbsScalar(d) - PetscRealPart(d); 472 v = aa + ai[i]; 473 nz = ai[i + 1] - ai[i]; 474 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 475 if (rs > sctx.shift_top) sctx.shift_top = rs; 476 } 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 PetscCall(ISGetIndices(isrow, &r)); 484 PetscCall(ISGetIndices(isicol, &ic)); 485 PetscCall(PetscMalloc1(n + 1, &rtmp)); 486 ics = ic; 487 488 do { 489 sctx.newshift = PETSC_FALSE; 490 for (i = 0; i < n; i++) { 491 /* zero rtmp */ 492 /* L part */ 493 nz = bi[i + 1] - bi[i]; 494 bjtmp = bj + bi[i]; 495 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 496 497 /* U part */ 498 nz = bdiag[i] - bdiag[i + 1]; 499 bjtmp = bj + bdiag[i + 1] + 1; 500 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 501 502 /* load in initial (unfactored row) */ 503 nz = ai[r[i] + 1] - ai[r[i]]; 504 ajtmp = aj + ai[r[i]]; 505 v = aa + ai[r[i]]; 506 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 507 /* ZeropivotApply() */ 508 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 509 510 /* elimination */ 511 bjtmp = bj + bi[i]; 512 row = *bjtmp++; 513 nzL = bi[i + 1] - bi[i]; 514 for (k = 0; k < nzL; k++) { 515 pc = rtmp + row; 516 if (*pc != 0.0) { 517 pv = b->a + bdiag[row]; 518 multiplier = *pc * (*pv); 519 *pc = multiplier; 520 521 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 522 pv = b->a + bdiag[row + 1] + 1; 523 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 524 525 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 526 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 527 } 528 row = *bjtmp++; 529 } 530 531 /* finished row so stick it into b->a */ 532 rs = 0.0; 533 /* L part */ 534 pv = b->a + bi[i]; 535 pj = b->j + bi[i]; 536 nz = bi[i + 1] - bi[i]; 537 for (j = 0; j < nz; j++) { 538 pv[j] = rtmp[pj[j]]; 539 rs += PetscAbsScalar(pv[j]); 540 } 541 542 /* U part */ 543 pv = b->a + bdiag[i + 1] + 1; 544 pj = b->j + bdiag[i + 1] + 1; 545 nz = bdiag[i] - bdiag[i + 1] - 1; 546 for (j = 0; j < nz; j++) { 547 pv[j] = rtmp[pj[j]]; 548 rs += PetscAbsScalar(pv[j]); 549 } 550 551 sctx.rs = rs; 552 sctx.pv = rtmp[i]; 553 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 554 if (sctx.newshift) break; /* break for-loop */ 555 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 556 557 /* Mark diagonal and invert diagonal for simpler triangular solves */ 558 pv = b->a + bdiag[i]; 559 *pv = 1.0 / rtmp[i]; 560 561 } /* endof for (i=0; i<n; i++) { */ 562 563 /* MatPivotRefine() */ 564 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 565 /* 566 * if no shift in this attempt & shifting & started shifting & can refine, 567 * then try lower shift 568 */ 569 sctx.shift_hi = sctx.shift_fraction; 570 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 571 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 572 sctx.newshift = PETSC_TRUE; 573 sctx.nshift++; 574 } 575 } while (sctx.newshift); 576 577 PetscCall(PetscFree(rtmp)); 578 PetscCall(ISRestoreIndices(isicol, &ic)); 579 PetscCall(ISRestoreIndices(isrow, &r)); 580 581 PetscCall(ISIdentity(isrow, &row_identity)); 582 PetscCall(ISIdentity(isicol, &col_identity)); 583 if (b->inode.size) { 584 C->ops->solve = MatSolve_SeqAIJ_Inode; 585 } else if (row_identity && col_identity) { 586 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 587 } else { 588 C->ops->solve = MatSolve_SeqAIJ; 589 } 590 C->ops->solveadd = MatSolveAdd_SeqAIJ; 591 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 592 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 593 C->ops->matsolve = MatMatSolve_SeqAIJ; 594 C->assembled = PETSC_TRUE; 595 C->preallocated = PETSC_TRUE; 596 597 PetscCall(PetscLogFlops(C->cmap->n)); 598 599 /* MatShiftView(A,info,&sctx) */ 600 if (sctx.nshift) { 601 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 602 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 603 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 604 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 605 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 606 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 607 } 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 613 Mat C = B; 614 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 615 IS isrow = b->row, isicol = b->icol; 616 const PetscInt *r, *ic, *ics; 617 PetscInt nz, row, i, j, n = A->rmap->n, diag; 618 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 619 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 620 MatScalar *pv, *rtmp, *pc, multiplier, d; 621 const MatScalar *v, *aa = a->a; 622 PetscReal rs = 0.0; 623 FactorShiftCtx sctx; 624 const PetscInt *ddiag; 625 PetscBool row_identity, col_identity; 626 627 PetscFunctionBegin; 628 /* MatPivotSetUp(): initialize shift context sctx */ 629 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 630 631 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 632 ddiag = a->diag; 633 sctx.shift_top = info->zeropivot; 634 for (i = 0; i < n; i++) { 635 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 636 d = (aa)[ddiag[i]]; 637 rs = -PetscAbsScalar(d) - PetscRealPart(d); 638 v = aa + ai[i]; 639 nz = ai[i + 1] - ai[i]; 640 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 641 if (rs > sctx.shift_top) sctx.shift_top = rs; 642 } 643 sctx.shift_top *= 1.1; 644 sctx.nshift_max = 5; 645 sctx.shift_lo = 0.; 646 sctx.shift_hi = 1.; 647 } 648 649 PetscCall(ISGetIndices(isrow, &r)); 650 PetscCall(ISGetIndices(isicol, &ic)); 651 PetscCall(PetscMalloc1(n + 1, &rtmp)); 652 ics = ic; 653 654 do { 655 sctx.newshift = PETSC_FALSE; 656 for (i = 0; i < n; i++) { 657 nz = bi[i + 1] - bi[i]; 658 bjtmp = bj + bi[i]; 659 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 660 661 /* load in initial (unfactored row) */ 662 nz = ai[r[i] + 1] - ai[r[i]]; 663 ajtmp = aj + ai[r[i]]; 664 v = aa + ai[r[i]]; 665 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 666 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 667 668 row = *bjtmp++; 669 while (row < i) { 670 pc = rtmp + row; 671 if (*pc != 0.0) { 672 pv = b->a + diag_offset[row]; 673 pj = b->j + diag_offset[row] + 1; 674 multiplier = *pc / *pv++; 675 *pc = multiplier; 676 nz = bi[row + 1] - diag_offset[row] - 1; 677 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 678 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 679 } 680 row = *bjtmp++; 681 } 682 /* finished row so stick it into b->a */ 683 pv = b->a + bi[i]; 684 pj = b->j + bi[i]; 685 nz = bi[i + 1] - bi[i]; 686 diag = diag_offset[i] - bi[i]; 687 rs = 0.0; 688 for (j = 0; j < nz; j++) { 689 pv[j] = rtmp[pj[j]]; 690 rs += PetscAbsScalar(pv[j]); 691 } 692 rs -= PetscAbsScalar(pv[diag]); 693 694 sctx.rs = rs; 695 sctx.pv = pv[diag]; 696 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 697 if (sctx.newshift) break; 698 pv[diag] = sctx.pv; 699 } 700 701 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 702 /* 703 * if no shift in this attempt & shifting & started shifting & can refine, 704 * then try lower shift 705 */ 706 sctx.shift_hi = sctx.shift_fraction; 707 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 708 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 709 sctx.newshift = PETSC_TRUE; 710 sctx.nshift++; 711 } 712 } while (sctx.newshift); 713 714 /* invert diagonal entries for simpler triangular solves */ 715 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 716 PetscCall(PetscFree(rtmp)); 717 PetscCall(ISRestoreIndices(isicol, &ic)); 718 PetscCall(ISRestoreIndices(isrow, &r)); 719 720 PetscCall(ISIdentity(isrow, &row_identity)); 721 PetscCall(ISIdentity(isicol, &col_identity)); 722 if (row_identity && col_identity) { 723 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 724 } else { 725 C->ops->solve = MatSolve_SeqAIJ_inplace; 726 } 727 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 728 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 729 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 730 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 731 732 C->assembled = PETSC_TRUE; 733 C->preallocated = PETSC_TRUE; 734 735 PetscCall(PetscLogFlops(C->cmap->n)); 736 if (sctx.nshift) { 737 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 738 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 739 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 740 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 741 } 742 } 743 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 744 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 745 746 PetscCall(MatSeqAIJCheckInode(C)); 747 PetscFunctionReturn(0); 748 } 749 750 /* 751 This routine implements inplace ILU(0) with row or/and column permutations. 752 Input: 753 A - original matrix 754 Output; 755 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 756 a->j (col index) is permuted by the inverse of colperm, then sorted 757 a->a reordered accordingly with a->j 758 a->diag (ptr to diagonal elements) is updated. 759 */ 760 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) { 761 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 762 IS isrow = a->row, isicol = a->icol; 763 const PetscInt *r, *ic, *ics; 764 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 765 PetscInt *ajtmp, nz, row; 766 PetscInt *diag = a->diag, nbdiag, *pj; 767 PetscScalar *rtmp, *pc, multiplier, d; 768 MatScalar *pv, *v; 769 PetscReal rs; 770 FactorShiftCtx sctx; 771 const MatScalar *aa = a->a, *vtmp; 772 773 PetscFunctionBegin; 774 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 775 776 /* MatPivotSetUp(): initialize shift context sctx */ 777 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 778 779 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 780 const PetscInt *ddiag = a->diag; 781 sctx.shift_top = info->zeropivot; 782 for (i = 0; i < n; i++) { 783 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 784 d = (aa)[ddiag[i]]; 785 rs = -PetscAbsScalar(d) - PetscRealPart(d); 786 vtmp = aa + ai[i]; 787 nz = ai[i + 1] - ai[i]; 788 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 789 if (rs > sctx.shift_top) sctx.shift_top = rs; 790 } 791 sctx.shift_top *= 1.1; 792 sctx.nshift_max = 5; 793 sctx.shift_lo = 0.; 794 sctx.shift_hi = 1.; 795 } 796 797 PetscCall(ISGetIndices(isrow, &r)); 798 PetscCall(ISGetIndices(isicol, &ic)); 799 PetscCall(PetscMalloc1(n + 1, &rtmp)); 800 PetscCall(PetscArrayzero(rtmp, n + 1)); 801 ics = ic; 802 803 #if defined(MV) 804 sctx.shift_top = 0.; 805 sctx.nshift_max = 0; 806 sctx.shift_lo = 0.; 807 sctx.shift_hi = 0.; 808 sctx.shift_fraction = 0.; 809 810 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 811 sctx.shift_top = 0.; 812 for (i = 0; i < n; i++) { 813 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 814 d = (a->a)[diag[i]]; 815 rs = -PetscAbsScalar(d) - PetscRealPart(d); 816 v = a->a + ai[i]; 817 nz = ai[i + 1] - ai[i]; 818 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 819 if (rs > sctx.shift_top) sctx.shift_top = rs; 820 } 821 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 822 sctx.shift_top *= 1.1; 823 sctx.nshift_max = 5; 824 sctx.shift_lo = 0.; 825 sctx.shift_hi = 1.; 826 } 827 828 sctx.shift_amount = 0.; 829 sctx.nshift = 0; 830 #endif 831 832 do { 833 sctx.newshift = PETSC_FALSE; 834 for (i = 0; i < n; i++) { 835 /* load in initial unfactored row */ 836 nz = ai[r[i] + 1] - ai[r[i]]; 837 ajtmp = aj + ai[r[i]]; 838 v = a->a + ai[r[i]]; 839 /* sort permuted ajtmp and values v accordingly */ 840 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 841 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 842 843 diag[r[i]] = ai[r[i]]; 844 for (j = 0; j < nz; j++) { 845 rtmp[ajtmp[j]] = v[j]; 846 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 847 } 848 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 849 850 row = *ajtmp++; 851 while (row < i) { 852 pc = rtmp + row; 853 if (*pc != 0.0) { 854 pv = a->a + diag[r[row]]; 855 pj = aj + diag[r[row]] + 1; 856 857 multiplier = *pc / *pv++; 858 *pc = multiplier; 859 nz = ai[r[row] + 1] - diag[r[row]] - 1; 860 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 861 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 862 } 863 row = *ajtmp++; 864 } 865 /* finished row so overwrite it onto a->a */ 866 pv = a->a + ai[r[i]]; 867 pj = aj + ai[r[i]]; 868 nz = ai[r[i] + 1] - ai[r[i]]; 869 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 870 871 rs = 0.0; 872 for (j = 0; j < nz; j++) { 873 pv[j] = rtmp[pj[j]]; 874 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 875 } 876 877 sctx.rs = rs; 878 sctx.pv = pv[nbdiag]; 879 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 880 if (sctx.newshift) break; 881 pv[nbdiag] = sctx.pv; 882 } 883 884 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 885 /* 886 * if no shift in this attempt & shifting & started shifting & can refine, 887 * then try lower shift 888 */ 889 sctx.shift_hi = sctx.shift_fraction; 890 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 891 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 892 sctx.newshift = PETSC_TRUE; 893 sctx.nshift++; 894 } 895 } while (sctx.newshift); 896 897 /* invert diagonal entries for simpler triangular solves */ 898 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 899 900 PetscCall(PetscFree(rtmp)); 901 PetscCall(ISRestoreIndices(isicol, &ic)); 902 PetscCall(ISRestoreIndices(isrow, &r)); 903 904 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 905 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 906 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 907 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 908 909 A->assembled = PETSC_TRUE; 910 A->preallocated = PETSC_TRUE; 911 912 PetscCall(PetscLogFlops(A->cmap->n)); 913 if (sctx.nshift) { 914 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 915 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 916 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 917 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 918 } 919 } 920 PetscFunctionReturn(0); 921 } 922 923 /* ----------------------------------------------------------- */ 924 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) { 925 Mat C; 926 927 PetscFunctionBegin; 928 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 929 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 930 PetscCall(MatLUFactorNumeric(C, A, info)); 931 932 A->ops->solve = C->ops->solve; 933 A->ops->solvetranspose = C->ops->solvetranspose; 934 935 PetscCall(MatHeaderMerge(A, &C)); 936 PetscFunctionReturn(0); 937 } 938 /* ----------------------------------------------------------- */ 939 940 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 941 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 942 IS iscol = a->col, isrow = a->row; 943 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 944 PetscInt nz; 945 const PetscInt *rout, *cout, *r, *c; 946 PetscScalar *x, *tmp, *tmps, sum; 947 const PetscScalar *b; 948 const MatScalar *aa = a->a, *v; 949 950 PetscFunctionBegin; 951 if (!n) PetscFunctionReturn(0); 952 953 PetscCall(VecGetArrayRead(bb, &b)); 954 PetscCall(VecGetArrayWrite(xx, &x)); 955 tmp = a->solve_work; 956 957 PetscCall(ISGetIndices(isrow, &rout)); 958 r = rout; 959 PetscCall(ISGetIndices(iscol, &cout)); 960 c = cout + (n - 1); 961 962 /* forward solve the lower triangular */ 963 tmp[0] = b[*r++]; 964 tmps = tmp; 965 for (i = 1; i < n; i++) { 966 v = aa + ai[i]; 967 vi = aj + ai[i]; 968 nz = a->diag[i] - ai[i]; 969 sum = b[*r++]; 970 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 971 tmp[i] = sum; 972 } 973 974 /* backward solve the upper triangular */ 975 for (i = n - 1; i >= 0; i--) { 976 v = aa + a->diag[i] + 1; 977 vi = aj + a->diag[i] + 1; 978 nz = ai[i + 1] - a->diag[i] - 1; 979 sum = tmp[i]; 980 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 981 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 982 } 983 984 PetscCall(ISRestoreIndices(isrow, &rout)); 985 PetscCall(ISRestoreIndices(iscol, &cout)); 986 PetscCall(VecRestoreArrayRead(bb, &b)); 987 PetscCall(VecRestoreArrayWrite(xx, &x)); 988 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) { 993 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 994 IS iscol = a->col, isrow = a->row; 995 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 996 PetscInt nz, neq, ldb, ldx; 997 const PetscInt *rout, *cout, *r, *c; 998 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 999 const PetscScalar *b, *aa = a->a, *v; 1000 PetscBool isdense; 1001 1002 PetscFunctionBegin; 1003 if (!n) PetscFunctionReturn(0); 1004 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1005 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1006 if (X != B) { 1007 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1008 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1009 } 1010 PetscCall(MatDenseGetArrayRead(B, &b)); 1011 PetscCall(MatDenseGetLDA(B, &ldb)); 1012 PetscCall(MatDenseGetArray(X, &x)); 1013 PetscCall(MatDenseGetLDA(X, &ldx)); 1014 PetscCall(ISGetIndices(isrow, &rout)); 1015 r = rout; 1016 PetscCall(ISGetIndices(iscol, &cout)); 1017 c = cout; 1018 for (neq = 0; neq < B->cmap->n; neq++) { 1019 /* forward solve the lower triangular */ 1020 tmp[0] = b[r[0]]; 1021 tmps = tmp; 1022 for (i = 1; i < n; i++) { 1023 v = aa + ai[i]; 1024 vi = aj + ai[i]; 1025 nz = a->diag[i] - ai[i]; 1026 sum = b[r[i]]; 1027 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1028 tmp[i] = sum; 1029 } 1030 /* backward solve the upper triangular */ 1031 for (i = n - 1; i >= 0; i--) { 1032 v = aa + a->diag[i] + 1; 1033 vi = aj + a->diag[i] + 1; 1034 nz = ai[i + 1] - a->diag[i] - 1; 1035 sum = tmp[i]; 1036 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1037 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1038 } 1039 b += ldb; 1040 x += ldx; 1041 } 1042 PetscCall(ISRestoreIndices(isrow, &rout)); 1043 PetscCall(ISRestoreIndices(iscol, &cout)); 1044 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1045 PetscCall(MatDenseRestoreArray(X, &x)); 1046 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) { 1051 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1052 IS iscol = a->col, isrow = a->row; 1053 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1054 PetscInt nz, neq, ldb, ldx; 1055 const PetscInt *rout, *cout, *r, *c; 1056 PetscScalar *x, *tmp = a->solve_work, sum; 1057 const PetscScalar *b, *aa = a->a, *v; 1058 PetscBool isdense; 1059 1060 PetscFunctionBegin; 1061 if (!n) PetscFunctionReturn(0); 1062 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1063 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1064 if (X != B) { 1065 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1066 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1067 } 1068 PetscCall(MatDenseGetArrayRead(B, &b)); 1069 PetscCall(MatDenseGetLDA(B, &ldb)); 1070 PetscCall(MatDenseGetArray(X, &x)); 1071 PetscCall(MatDenseGetLDA(X, &ldx)); 1072 PetscCall(ISGetIndices(isrow, &rout)); 1073 r = rout; 1074 PetscCall(ISGetIndices(iscol, &cout)); 1075 c = cout; 1076 for (neq = 0; neq < B->cmap->n; neq++) { 1077 /* forward solve the lower triangular */ 1078 tmp[0] = b[r[0]]; 1079 v = aa; 1080 vi = aj; 1081 for (i = 1; i < n; i++) { 1082 nz = ai[i + 1] - ai[i]; 1083 sum = b[r[i]]; 1084 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1085 tmp[i] = sum; 1086 v += nz; 1087 vi += nz; 1088 } 1089 /* backward solve the upper triangular */ 1090 for (i = n - 1; i >= 0; i--) { 1091 v = aa + adiag[i + 1] + 1; 1092 vi = aj + adiag[i + 1] + 1; 1093 nz = adiag[i] - adiag[i + 1] - 1; 1094 sum = tmp[i]; 1095 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1096 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1097 } 1098 b += ldb; 1099 x += ldx; 1100 } 1101 PetscCall(ISRestoreIndices(isrow, &rout)); 1102 PetscCall(ISRestoreIndices(iscol, &cout)); 1103 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1104 PetscCall(MatDenseRestoreArray(X, &x)); 1105 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) { 1110 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1111 IS iscol = a->col, isrow = a->row; 1112 const PetscInt *r, *c, *rout, *cout; 1113 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1114 PetscInt nz, row; 1115 PetscScalar *x, *tmp, *tmps, sum; 1116 const PetscScalar *b; 1117 const MatScalar *aa = a->a, *v; 1118 1119 PetscFunctionBegin; 1120 if (!n) PetscFunctionReturn(0); 1121 1122 PetscCall(VecGetArrayRead(bb, &b)); 1123 PetscCall(VecGetArrayWrite(xx, &x)); 1124 tmp = a->solve_work; 1125 1126 PetscCall(ISGetIndices(isrow, &rout)); 1127 r = rout; 1128 PetscCall(ISGetIndices(iscol, &cout)); 1129 c = cout + (n - 1); 1130 1131 /* forward solve the lower triangular */ 1132 tmp[0] = b[*r++]; 1133 tmps = tmp; 1134 for (row = 1; row < n; row++) { 1135 i = rout[row]; /* permuted row */ 1136 v = aa + ai[i]; 1137 vi = aj + ai[i]; 1138 nz = a->diag[i] - ai[i]; 1139 sum = b[*r++]; 1140 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1141 tmp[row] = sum; 1142 } 1143 1144 /* backward solve the upper triangular */ 1145 for (row = n - 1; row >= 0; row--) { 1146 i = rout[row]; /* permuted row */ 1147 v = aa + a->diag[i] + 1; 1148 vi = aj + a->diag[i] + 1; 1149 nz = ai[i + 1] - a->diag[i] - 1; 1150 sum = tmp[row]; 1151 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1152 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1153 } 1154 1155 PetscCall(ISRestoreIndices(isrow, &rout)); 1156 PetscCall(ISRestoreIndices(iscol, &cout)); 1157 PetscCall(VecRestoreArrayRead(bb, &b)); 1158 PetscCall(VecRestoreArrayWrite(xx, &x)); 1159 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 /* ----------------------------------------------------------- */ 1164 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1165 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 1166 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1167 PetscInt n = A->rmap->n; 1168 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1169 PetscScalar *x; 1170 const PetscScalar *b; 1171 const MatScalar *aa = a->a; 1172 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1173 PetscInt adiag_i, i, nz, ai_i; 1174 const PetscInt *vi; 1175 const MatScalar *v; 1176 PetscScalar sum; 1177 #endif 1178 1179 PetscFunctionBegin; 1180 if (!n) PetscFunctionReturn(0); 1181 1182 PetscCall(VecGetArrayRead(bb, &b)); 1183 PetscCall(VecGetArrayWrite(xx, &x)); 1184 1185 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1186 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1187 #else 1188 /* forward solve the lower triangular */ 1189 x[0] = b[0]; 1190 for (i = 1; i < n; i++) { 1191 ai_i = ai[i]; 1192 v = aa + ai_i; 1193 vi = aj + ai_i; 1194 nz = adiag[i] - ai_i; 1195 sum = b[i]; 1196 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1197 x[i] = sum; 1198 } 1199 1200 /* backward solve the upper triangular */ 1201 for (i = n - 1; i >= 0; i--) { 1202 adiag_i = adiag[i]; 1203 v = aa + adiag_i + 1; 1204 vi = aj + adiag_i + 1; 1205 nz = ai[i + 1] - adiag_i - 1; 1206 sum = x[i]; 1207 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1208 x[i] = sum * aa[adiag_i]; 1209 } 1210 #endif 1211 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1212 PetscCall(VecRestoreArrayRead(bb, &b)); 1213 PetscCall(VecRestoreArrayWrite(xx, &x)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) { 1218 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1219 IS iscol = a->col, isrow = a->row; 1220 PetscInt i, n = A->rmap->n, j; 1221 PetscInt nz; 1222 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1223 PetscScalar *x, *tmp, sum; 1224 const PetscScalar *b; 1225 const MatScalar *aa = a->a, *v; 1226 1227 PetscFunctionBegin; 1228 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1229 1230 PetscCall(VecGetArrayRead(bb, &b)); 1231 PetscCall(VecGetArray(xx, &x)); 1232 tmp = a->solve_work; 1233 1234 PetscCall(ISGetIndices(isrow, &rout)); 1235 r = rout; 1236 PetscCall(ISGetIndices(iscol, &cout)); 1237 c = cout + (n - 1); 1238 1239 /* forward solve the lower triangular */ 1240 tmp[0] = b[*r++]; 1241 for (i = 1; i < n; i++) { 1242 v = aa + ai[i]; 1243 vi = aj + ai[i]; 1244 nz = a->diag[i] - ai[i]; 1245 sum = b[*r++]; 1246 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1247 tmp[i] = sum; 1248 } 1249 1250 /* backward solve the upper triangular */ 1251 for (i = n - 1; i >= 0; i--) { 1252 v = aa + a->diag[i] + 1; 1253 vi = aj + a->diag[i] + 1; 1254 nz = ai[i + 1] - a->diag[i] - 1; 1255 sum = tmp[i]; 1256 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1257 tmp[i] = sum * aa[a->diag[i]]; 1258 x[*c--] += tmp[i]; 1259 } 1260 1261 PetscCall(ISRestoreIndices(isrow, &rout)); 1262 PetscCall(ISRestoreIndices(iscol, &cout)); 1263 PetscCall(VecRestoreArrayRead(bb, &b)); 1264 PetscCall(VecRestoreArray(xx, &x)); 1265 PetscCall(PetscLogFlops(2.0 * a->nz)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) { 1270 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1271 IS iscol = a->col, isrow = a->row; 1272 PetscInt i, n = A->rmap->n, j; 1273 PetscInt nz; 1274 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1275 PetscScalar *x, *tmp, sum; 1276 const PetscScalar *b; 1277 const MatScalar *aa = a->a, *v; 1278 1279 PetscFunctionBegin; 1280 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1281 1282 PetscCall(VecGetArrayRead(bb, &b)); 1283 PetscCall(VecGetArray(xx, &x)); 1284 tmp = a->solve_work; 1285 1286 PetscCall(ISGetIndices(isrow, &rout)); 1287 r = rout; 1288 PetscCall(ISGetIndices(iscol, &cout)); 1289 c = cout; 1290 1291 /* forward solve the lower triangular */ 1292 tmp[0] = b[r[0]]; 1293 v = aa; 1294 vi = aj; 1295 for (i = 1; i < n; i++) { 1296 nz = ai[i + 1] - ai[i]; 1297 sum = b[r[i]]; 1298 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1299 tmp[i] = sum; 1300 v += nz; 1301 vi += nz; 1302 } 1303 1304 /* backward solve the upper triangular */ 1305 v = aa + adiag[n - 1]; 1306 vi = aj + adiag[n - 1]; 1307 for (i = n - 1; i >= 0; i--) { 1308 nz = adiag[i] - adiag[i + 1] - 1; 1309 sum = tmp[i]; 1310 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1311 tmp[i] = sum * v[nz]; 1312 x[c[i]] += tmp[i]; 1313 v += nz + 1; 1314 vi += nz + 1; 1315 } 1316 1317 PetscCall(ISRestoreIndices(isrow, &rout)); 1318 PetscCall(ISRestoreIndices(iscol, &cout)); 1319 PetscCall(VecRestoreArrayRead(bb, &b)); 1320 PetscCall(VecRestoreArray(xx, &x)); 1321 PetscCall(PetscLogFlops(2.0 * a->nz)); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 1326 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1327 IS iscol = a->col, isrow = a->row; 1328 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1329 PetscInt i, n = A->rmap->n, j; 1330 PetscInt nz; 1331 PetscScalar *x, *tmp, s1; 1332 const MatScalar *aa = a->a, *v; 1333 const PetscScalar *b; 1334 1335 PetscFunctionBegin; 1336 PetscCall(VecGetArrayRead(bb, &b)); 1337 PetscCall(VecGetArrayWrite(xx, &x)); 1338 tmp = a->solve_work; 1339 1340 PetscCall(ISGetIndices(isrow, &rout)); 1341 r = rout; 1342 PetscCall(ISGetIndices(iscol, &cout)); 1343 c = cout; 1344 1345 /* copy the b into temp work space according to permutation */ 1346 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1347 1348 /* forward solve the U^T */ 1349 for (i = 0; i < n; i++) { 1350 v = aa + diag[i]; 1351 vi = aj + diag[i] + 1; 1352 nz = ai[i + 1] - diag[i] - 1; 1353 s1 = tmp[i]; 1354 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1355 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1356 tmp[i] = s1; 1357 } 1358 1359 /* backward solve the L^T */ 1360 for (i = n - 1; i >= 0; i--) { 1361 v = aa + diag[i] - 1; 1362 vi = aj + diag[i] - 1; 1363 nz = diag[i] - ai[i]; 1364 s1 = tmp[i]; 1365 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1366 } 1367 1368 /* copy tmp into x according to permutation */ 1369 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1370 1371 PetscCall(ISRestoreIndices(isrow, &rout)); 1372 PetscCall(ISRestoreIndices(iscol, &cout)); 1373 PetscCall(VecRestoreArrayRead(bb, &b)); 1374 PetscCall(VecRestoreArrayWrite(xx, &x)); 1375 1376 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) { 1381 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1382 IS iscol = a->col, isrow = a->row; 1383 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1384 PetscInt i, n = A->rmap->n, j; 1385 PetscInt nz; 1386 PetscScalar *x, *tmp, s1; 1387 const MatScalar *aa = a->a, *v; 1388 const PetscScalar *b; 1389 1390 PetscFunctionBegin; 1391 PetscCall(VecGetArrayRead(bb, &b)); 1392 PetscCall(VecGetArrayWrite(xx, &x)); 1393 tmp = a->solve_work; 1394 1395 PetscCall(ISGetIndices(isrow, &rout)); 1396 r = rout; 1397 PetscCall(ISGetIndices(iscol, &cout)); 1398 c = cout; 1399 1400 /* copy the b into temp work space according to permutation */ 1401 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1402 1403 /* forward solve the U^T */ 1404 for (i = 0; i < n; i++) { 1405 v = aa + adiag[i + 1] + 1; 1406 vi = aj + adiag[i + 1] + 1; 1407 nz = adiag[i] - adiag[i + 1] - 1; 1408 s1 = tmp[i]; 1409 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1410 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1411 tmp[i] = s1; 1412 } 1413 1414 /* backward solve the L^T */ 1415 for (i = n - 1; i >= 0; i--) { 1416 v = aa + ai[i]; 1417 vi = aj + ai[i]; 1418 nz = ai[i + 1] - ai[i]; 1419 s1 = tmp[i]; 1420 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1421 } 1422 1423 /* copy tmp into x according to permutation */ 1424 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1425 1426 PetscCall(ISRestoreIndices(isrow, &rout)); 1427 PetscCall(ISRestoreIndices(iscol, &cout)); 1428 PetscCall(VecRestoreArrayRead(bb, &b)); 1429 PetscCall(VecRestoreArrayWrite(xx, &x)); 1430 1431 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) { 1436 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1437 IS iscol = a->col, isrow = a->row; 1438 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1439 PetscInt i, n = A->rmap->n, j; 1440 PetscInt nz; 1441 PetscScalar *x, *tmp, s1; 1442 const MatScalar *aa = a->a, *v; 1443 const PetscScalar *b; 1444 1445 PetscFunctionBegin; 1446 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1447 PetscCall(VecGetArrayRead(bb, &b)); 1448 PetscCall(VecGetArray(xx, &x)); 1449 tmp = a->solve_work; 1450 1451 PetscCall(ISGetIndices(isrow, &rout)); 1452 r = rout; 1453 PetscCall(ISGetIndices(iscol, &cout)); 1454 c = cout; 1455 1456 /* copy the b into temp work space according to permutation */ 1457 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1458 1459 /* forward solve the U^T */ 1460 for (i = 0; i < n; i++) { 1461 v = aa + diag[i]; 1462 vi = aj + diag[i] + 1; 1463 nz = ai[i + 1] - diag[i] - 1; 1464 s1 = tmp[i]; 1465 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1466 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1467 tmp[i] = s1; 1468 } 1469 1470 /* backward solve the L^T */ 1471 for (i = n - 1; i >= 0; i--) { 1472 v = aa + diag[i] - 1; 1473 vi = aj + diag[i] - 1; 1474 nz = diag[i] - ai[i]; 1475 s1 = tmp[i]; 1476 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1477 } 1478 1479 /* copy tmp into x according to permutation */ 1480 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1481 1482 PetscCall(ISRestoreIndices(isrow, &rout)); 1483 PetscCall(ISRestoreIndices(iscol, &cout)); 1484 PetscCall(VecRestoreArrayRead(bb, &b)); 1485 PetscCall(VecRestoreArray(xx, &x)); 1486 1487 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) { 1492 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1493 IS iscol = a->col, isrow = a->row; 1494 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1495 PetscInt i, n = A->rmap->n, j; 1496 PetscInt nz; 1497 PetscScalar *x, *tmp, s1; 1498 const MatScalar *aa = a->a, *v; 1499 const PetscScalar *b; 1500 1501 PetscFunctionBegin; 1502 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1503 PetscCall(VecGetArrayRead(bb, &b)); 1504 PetscCall(VecGetArray(xx, &x)); 1505 tmp = a->solve_work; 1506 1507 PetscCall(ISGetIndices(isrow, &rout)); 1508 r = rout; 1509 PetscCall(ISGetIndices(iscol, &cout)); 1510 c = cout; 1511 1512 /* copy the b into temp work space according to permutation */ 1513 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1514 1515 /* forward solve the U^T */ 1516 for (i = 0; i < n; i++) { 1517 v = aa + adiag[i + 1] + 1; 1518 vi = aj + adiag[i + 1] + 1; 1519 nz = adiag[i] - adiag[i + 1] - 1; 1520 s1 = tmp[i]; 1521 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1522 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1523 tmp[i] = s1; 1524 } 1525 1526 /* backward solve the L^T */ 1527 for (i = n - 1; i >= 0; i--) { 1528 v = aa + ai[i]; 1529 vi = aj + ai[i]; 1530 nz = ai[i + 1] - ai[i]; 1531 s1 = tmp[i]; 1532 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1533 } 1534 1535 /* copy tmp into x according to permutation */ 1536 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1537 1538 PetscCall(ISRestoreIndices(isrow, &rout)); 1539 PetscCall(ISRestoreIndices(iscol, &cout)); 1540 PetscCall(VecRestoreArrayRead(bb, &b)); 1541 PetscCall(VecRestoreArray(xx, &x)); 1542 1543 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 /* ----------------------------------------------------------------*/ 1548 1549 /* 1550 ilu() under revised new data structure. 1551 Factored arrays bj and ba are stored as 1552 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1553 1554 bi=fact->i is an array of size n+1, in which 1555 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1556 bi[n]: points to L(n-1,n-1)+1 1557 1558 bdiag=fact->diag is an array of size n+1,in which 1559 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1560 bdiag[n]: points to entry of U(n-1,0)-1 1561 1562 U(i,:) contains bdiag[i] as its last entry, i.e., 1563 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1564 */ 1565 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1566 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1567 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1568 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1569 IS isicol; 1570 1571 PetscFunctionBegin; 1572 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1573 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1574 b = (Mat_SeqAIJ *)(fact)->data; 1575 1576 /* allocate matrix arrays for new data structure */ 1577 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1578 1579 b->singlemalloc = PETSC_TRUE; 1580 if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); } 1581 bdiag = b->diag; 1582 1583 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1584 1585 /* set bi and bj with new data structure */ 1586 bi = b->i; 1587 bj = b->j; 1588 1589 /* L part */ 1590 bi[0] = 0; 1591 for (i = 0; i < n; i++) { 1592 nz = adiag[i] - ai[i]; 1593 bi[i + 1] = bi[i] + nz; 1594 aj = a->j + ai[i]; 1595 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1596 } 1597 1598 /* U part */ 1599 bdiag[n] = bi[n] - 1; 1600 for (i = n - 1; i >= 0; i--) { 1601 nz = ai[i + 1] - adiag[i] - 1; 1602 aj = a->j + adiag[i] + 1; 1603 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1604 /* diag[i] */ 1605 bj[k++] = i; 1606 bdiag[i] = bdiag[i + 1] + nz + 1; 1607 } 1608 1609 fact->factortype = MAT_FACTOR_ILU; 1610 fact->info.factor_mallocs = 0; 1611 fact->info.fill_ratio_given = info->fill; 1612 fact->info.fill_ratio_needed = 1.0; 1613 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1614 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1615 1616 b = (Mat_SeqAIJ *)(fact)->data; 1617 b->row = isrow; 1618 b->col = iscol; 1619 b->icol = isicol; 1620 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1621 PetscCall(PetscObjectReference((PetscObject)isrow)); 1622 PetscCall(PetscObjectReference((PetscObject)iscol)); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1627 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1628 IS isicol; 1629 const PetscInt *r, *ic; 1630 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1631 PetscInt *bi, *cols, nnz, *cols_lvl; 1632 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1633 PetscInt i, levels, diagonal_fill; 1634 PetscBool col_identity, row_identity, missing; 1635 PetscReal f; 1636 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1637 PetscBT lnkbt; 1638 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1639 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1640 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1641 1642 PetscFunctionBegin; 1643 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1644 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1645 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1646 1647 levels = (PetscInt)info->levels; 1648 PetscCall(ISIdentity(isrow, &row_identity)); 1649 PetscCall(ISIdentity(iscol, &col_identity)); 1650 if (!levels && row_identity && col_identity) { 1651 /* special case: ilu(0) with natural ordering */ 1652 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1653 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1654 PetscFunctionReturn(0); 1655 } 1656 1657 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1658 PetscCall(ISGetIndices(isrow, &r)); 1659 PetscCall(ISGetIndices(isicol, &ic)); 1660 1661 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1662 PetscCall(PetscMalloc1(n + 1, &bi)); 1663 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1664 bi[0] = bdiag[0] = 0; 1665 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1666 1667 /* create a linked list for storing column indices of the active row */ 1668 nlnk = n + 1; 1669 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1670 1671 /* initial FreeSpace size is f*(ai[n]+1) */ 1672 f = info->fill; 1673 diagonal_fill = (PetscInt)info->diagonal_fill; 1674 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1675 current_space = free_space; 1676 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1677 current_space_lvl = free_space_lvl; 1678 for (i = 0; i < n; i++) { 1679 nzi = 0; 1680 /* copy current row into linked list */ 1681 nnz = ai[r[i] + 1] - ai[r[i]]; 1682 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1683 cols = aj + ai[r[i]]; 1684 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1685 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1686 nzi += nlnk; 1687 1688 /* make sure diagonal entry is included */ 1689 if (diagonal_fill && lnk[i] == -1) { 1690 fm = n; 1691 while (lnk[fm] < i) fm = lnk[fm]; 1692 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1693 lnk[fm] = i; 1694 lnk_lvl[i] = 0; 1695 nzi++; 1696 dcount++; 1697 } 1698 1699 /* add pivot rows into the active row */ 1700 nzbd = 0; 1701 prow = lnk[n]; 1702 while (prow < i) { 1703 nnz = bdiag[prow]; 1704 cols = bj_ptr[prow] + nnz + 1; 1705 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1706 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1707 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1708 nzi += nlnk; 1709 prow = lnk[prow]; 1710 nzbd++; 1711 } 1712 bdiag[i] = nzbd; 1713 bi[i + 1] = bi[i] + nzi; 1714 /* if free space is not available, make more free space */ 1715 if (current_space->local_remaining < nzi) { 1716 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1717 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1718 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1719 reallocs++; 1720 } 1721 1722 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1723 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1724 bj_ptr[i] = current_space->array; 1725 bjlvl_ptr[i] = current_space_lvl->array; 1726 1727 /* make sure the active row i has diagonal entry */ 1728 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1729 1730 current_space->array += nzi; 1731 current_space->local_used += nzi; 1732 current_space->local_remaining -= nzi; 1733 current_space_lvl->array += nzi; 1734 current_space_lvl->local_used += nzi; 1735 current_space_lvl->local_remaining -= nzi; 1736 } 1737 1738 PetscCall(ISRestoreIndices(isrow, &r)); 1739 PetscCall(ISRestoreIndices(isicol, &ic)); 1740 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1741 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1742 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1743 1744 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1745 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1746 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1747 1748 #if defined(PETSC_USE_INFO) 1749 { 1750 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1751 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1752 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1753 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1754 PetscCall(PetscInfo(A, "for best performance.\n")); 1755 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1756 } 1757 #endif 1758 /* put together the new matrix */ 1759 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1760 b = (Mat_SeqAIJ *)(fact)->data; 1761 1762 b->free_a = PETSC_TRUE; 1763 b->free_ij = PETSC_TRUE; 1764 b->singlemalloc = PETSC_FALSE; 1765 1766 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1767 1768 b->j = bj; 1769 b->i = bi; 1770 b->diag = bdiag; 1771 b->ilen = NULL; 1772 b->imax = NULL; 1773 b->row = isrow; 1774 b->col = iscol; 1775 PetscCall(PetscObjectReference((PetscObject)isrow)); 1776 PetscCall(PetscObjectReference((PetscObject)iscol)); 1777 b->icol = isicol; 1778 1779 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1780 /* In b structure: Free imax, ilen, old a, old j. 1781 Allocate bdiag, solve_work, new a, new j */ 1782 b->maxnz = b->nz = bdiag[0] + 1; 1783 1784 (fact)->info.factor_mallocs = reallocs; 1785 (fact)->info.fill_ratio_given = f; 1786 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1787 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1788 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1789 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1794 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1795 IS isicol; 1796 const PetscInt *r, *ic; 1797 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1798 PetscInt *bi, *cols, nnz, *cols_lvl; 1799 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1800 PetscInt i, levels, diagonal_fill; 1801 PetscBool col_identity, row_identity; 1802 PetscReal f; 1803 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1804 PetscBT lnkbt; 1805 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1806 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1807 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1808 PetscBool missing; 1809 1810 PetscFunctionBegin; 1811 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1812 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1813 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1814 1815 f = info->fill; 1816 levels = (PetscInt)info->levels; 1817 diagonal_fill = (PetscInt)info->diagonal_fill; 1818 1819 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1820 1821 PetscCall(ISIdentity(isrow, &row_identity)); 1822 PetscCall(ISIdentity(iscol, &col_identity)); 1823 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1824 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1825 1826 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1827 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1828 fact->factortype = MAT_FACTOR_ILU; 1829 (fact)->info.factor_mallocs = 0; 1830 (fact)->info.fill_ratio_given = info->fill; 1831 (fact)->info.fill_ratio_needed = 1.0; 1832 1833 b = (Mat_SeqAIJ *)(fact)->data; 1834 b->row = isrow; 1835 b->col = iscol; 1836 b->icol = isicol; 1837 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1838 PetscCall(PetscObjectReference((PetscObject)isrow)); 1839 PetscCall(PetscObjectReference((PetscObject)iscol)); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscCall(ISGetIndices(isrow, &r)); 1844 PetscCall(ISGetIndices(isicol, &ic)); 1845 1846 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1847 PetscCall(PetscMalloc1(n + 1, &bi)); 1848 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1849 bi[0] = bdiag[0] = 0; 1850 1851 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1852 1853 /* create a linked list for storing column indices of the active row */ 1854 nlnk = n + 1; 1855 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1856 1857 /* initial FreeSpace size is f*(ai[n]+1) */ 1858 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1859 current_space = free_space; 1860 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1861 current_space_lvl = free_space_lvl; 1862 1863 for (i = 0; i < n; i++) { 1864 nzi = 0; 1865 /* copy current row into linked list */ 1866 nnz = ai[r[i] + 1] - ai[r[i]]; 1867 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1868 cols = aj + ai[r[i]]; 1869 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1870 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1871 nzi += nlnk; 1872 1873 /* make sure diagonal entry is included */ 1874 if (diagonal_fill && lnk[i] == -1) { 1875 fm = n; 1876 while (lnk[fm] < i) fm = lnk[fm]; 1877 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1878 lnk[fm] = i; 1879 lnk_lvl[i] = 0; 1880 nzi++; 1881 dcount++; 1882 } 1883 1884 /* add pivot rows into the active row */ 1885 nzbd = 0; 1886 prow = lnk[n]; 1887 while (prow < i) { 1888 nnz = bdiag[prow]; 1889 cols = bj_ptr[prow] + nnz + 1; 1890 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1891 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1892 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1893 nzi += nlnk; 1894 prow = lnk[prow]; 1895 nzbd++; 1896 } 1897 bdiag[i] = nzbd; 1898 bi[i + 1] = bi[i] + nzi; 1899 1900 /* if free space is not available, make more free space */ 1901 if (current_space->local_remaining < nzi) { 1902 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1903 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1904 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1905 reallocs++; 1906 } 1907 1908 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1909 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1910 bj_ptr[i] = current_space->array; 1911 bjlvl_ptr[i] = current_space_lvl->array; 1912 1913 /* make sure the active row i has diagonal entry */ 1914 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1915 1916 current_space->array += nzi; 1917 current_space->local_used += nzi; 1918 current_space->local_remaining -= nzi; 1919 current_space_lvl->array += nzi; 1920 current_space_lvl->local_used += nzi; 1921 current_space_lvl->local_remaining -= nzi; 1922 } 1923 1924 PetscCall(ISRestoreIndices(isrow, &r)); 1925 PetscCall(ISRestoreIndices(isicol, &ic)); 1926 1927 /* destroy list of free space and other temporary arrays */ 1928 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1929 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1930 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1931 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1932 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1933 1934 #if defined(PETSC_USE_INFO) 1935 { 1936 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1937 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1938 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1939 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1940 PetscCall(PetscInfo(A, "for best performance.\n")); 1941 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1942 } 1943 #endif 1944 1945 /* put together the new matrix */ 1946 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1947 b = (Mat_SeqAIJ *)(fact)->data; 1948 1949 b->free_a = PETSC_TRUE; 1950 b->free_ij = PETSC_TRUE; 1951 b->singlemalloc = PETSC_FALSE; 1952 1953 PetscCall(PetscMalloc1(bi[n], &b->a)); 1954 b->j = bj; 1955 b->i = bi; 1956 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1957 b->diag = bdiag; 1958 b->ilen = NULL; 1959 b->imax = NULL; 1960 b->row = isrow; 1961 b->col = iscol; 1962 PetscCall(PetscObjectReference((PetscObject)isrow)); 1963 PetscCall(PetscObjectReference((PetscObject)iscol)); 1964 b->icol = isicol; 1965 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1966 /* In b structure: Free imax, ilen, old a, old j. 1967 Allocate bdiag, solve_work, new a, new j */ 1968 b->maxnz = b->nz = bi[n]; 1969 1970 (fact)->info.factor_mallocs = reallocs; 1971 (fact)->info.fill_ratio_given = f; 1972 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1973 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1974 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 1979 Mat C = B; 1980 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1981 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1982 IS ip = b->row, iip = b->icol; 1983 const PetscInt *rip, *riip; 1984 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1985 PetscInt *ai = a->i, *aj = a->j; 1986 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1987 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1988 PetscBool perm_identity; 1989 FactorShiftCtx sctx; 1990 PetscReal rs; 1991 MatScalar d, *v; 1992 1993 PetscFunctionBegin; 1994 /* MatPivotSetUp(): initialize shift context sctx */ 1995 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1996 1997 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1998 sctx.shift_top = info->zeropivot; 1999 for (i = 0; i < mbs; i++) { 2000 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2001 d = (aa)[a->diag[i]]; 2002 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2003 v = aa + ai[i]; 2004 nz = ai[i + 1] - ai[i]; 2005 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2006 if (rs > sctx.shift_top) sctx.shift_top = rs; 2007 } 2008 sctx.shift_top *= 1.1; 2009 sctx.nshift_max = 5; 2010 sctx.shift_lo = 0.; 2011 sctx.shift_hi = 1.; 2012 } 2013 2014 PetscCall(ISGetIndices(ip, &rip)); 2015 PetscCall(ISGetIndices(iip, &riip)); 2016 2017 /* allocate working arrays 2018 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2019 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2020 */ 2021 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2022 2023 do { 2024 sctx.newshift = PETSC_FALSE; 2025 2026 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2027 if (mbs) il[0] = 0; 2028 2029 for (k = 0; k < mbs; k++) { 2030 /* zero rtmp */ 2031 nz = bi[k + 1] - bi[k]; 2032 bjtmp = bj + bi[k]; 2033 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2034 2035 /* load in initial unfactored row */ 2036 bval = ba + bi[k]; 2037 jmin = ai[rip[k]]; 2038 jmax = ai[rip[k] + 1]; 2039 for (j = jmin; j < jmax; j++) { 2040 col = riip[aj[j]]; 2041 if (col >= k) { /* only take upper triangular entry */ 2042 rtmp[col] = aa[j]; 2043 *bval++ = 0.0; /* for in-place factorization */ 2044 } 2045 } 2046 /* shift the diagonal of the matrix: ZeropivotApply() */ 2047 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2048 2049 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2050 dk = rtmp[k]; 2051 i = c2r[k]; /* first row to be added to k_th row */ 2052 2053 while (i < k) { 2054 nexti = c2r[i]; /* next row to be added to k_th row */ 2055 2056 /* compute multiplier, update diag(k) and U(i,k) */ 2057 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2058 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2059 dk += uikdi * ba[ili]; /* update diag[k] */ 2060 ba[ili] = uikdi; /* -U(i,k) */ 2061 2062 /* add multiple of row i to k-th row */ 2063 jmin = ili + 1; 2064 jmax = bi[i + 1]; 2065 if (jmin < jmax) { 2066 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2067 /* update il and c2r for row i */ 2068 il[i] = jmin; 2069 j = bj[jmin]; 2070 c2r[i] = c2r[j]; 2071 c2r[j] = i; 2072 } 2073 i = nexti; 2074 } 2075 2076 /* copy data into U(k,:) */ 2077 rs = 0.0; 2078 jmin = bi[k]; 2079 jmax = bi[k + 1] - 1; 2080 if (jmin < jmax) { 2081 for (j = jmin; j < jmax; j++) { 2082 col = bj[j]; 2083 ba[j] = rtmp[col]; 2084 rs += PetscAbsScalar(ba[j]); 2085 } 2086 /* add the k-th row into il and c2r */ 2087 il[k] = jmin; 2088 i = bj[jmin]; 2089 c2r[k] = c2r[i]; 2090 c2r[i] = k; 2091 } 2092 2093 /* MatPivotCheck() */ 2094 sctx.rs = rs; 2095 sctx.pv = dk; 2096 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2097 if (sctx.newshift) break; 2098 dk = sctx.pv; 2099 2100 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2101 } 2102 } while (sctx.newshift); 2103 2104 PetscCall(PetscFree3(rtmp, il, c2r)); 2105 PetscCall(ISRestoreIndices(ip, &rip)); 2106 PetscCall(ISRestoreIndices(iip, &riip)); 2107 2108 PetscCall(ISIdentity(ip, &perm_identity)); 2109 if (perm_identity) { 2110 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2111 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2112 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2113 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2114 } else { 2115 B->ops->solve = MatSolve_SeqSBAIJ_1; 2116 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2117 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2118 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2119 } 2120 2121 C->assembled = PETSC_TRUE; 2122 C->preallocated = PETSC_TRUE; 2123 2124 PetscCall(PetscLogFlops(C->rmap->n)); 2125 2126 /* MatPivotView() */ 2127 if (sctx.nshift) { 2128 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2129 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2130 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2131 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2132 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2133 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2134 } 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 2140 Mat C = B; 2141 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2142 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2143 IS ip = b->row, iip = b->icol; 2144 const PetscInt *rip, *riip; 2145 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2146 PetscInt *ai = a->i, *aj = a->j; 2147 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2148 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2149 PetscBool perm_identity; 2150 FactorShiftCtx sctx; 2151 PetscReal rs; 2152 MatScalar d, *v; 2153 2154 PetscFunctionBegin; 2155 /* MatPivotSetUp(): initialize shift context sctx */ 2156 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2157 2158 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2159 sctx.shift_top = info->zeropivot; 2160 for (i = 0; i < mbs; i++) { 2161 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2162 d = (aa)[a->diag[i]]; 2163 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2164 v = aa + ai[i]; 2165 nz = ai[i + 1] - ai[i]; 2166 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2167 if (rs > sctx.shift_top) sctx.shift_top = rs; 2168 } 2169 sctx.shift_top *= 1.1; 2170 sctx.nshift_max = 5; 2171 sctx.shift_lo = 0.; 2172 sctx.shift_hi = 1.; 2173 } 2174 2175 PetscCall(ISGetIndices(ip, &rip)); 2176 PetscCall(ISGetIndices(iip, &riip)); 2177 2178 /* initialization */ 2179 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2180 2181 do { 2182 sctx.newshift = PETSC_FALSE; 2183 2184 for (i = 0; i < mbs; i++) jl[i] = mbs; 2185 il[0] = 0; 2186 2187 for (k = 0; k < mbs; k++) { 2188 /* zero rtmp */ 2189 nz = bi[k + 1] - bi[k]; 2190 bjtmp = bj + bi[k]; 2191 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2192 2193 bval = ba + bi[k]; 2194 /* initialize k-th row by the perm[k]-th row of A */ 2195 jmin = ai[rip[k]]; 2196 jmax = ai[rip[k] + 1]; 2197 for (j = jmin; j < jmax; j++) { 2198 col = riip[aj[j]]; 2199 if (col >= k) { /* only take upper triangular entry */ 2200 rtmp[col] = aa[j]; 2201 *bval++ = 0.0; /* for in-place factorization */ 2202 } 2203 } 2204 /* shift the diagonal of the matrix */ 2205 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2206 2207 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2208 dk = rtmp[k]; 2209 i = jl[k]; /* first row to be added to k_th row */ 2210 2211 while (i < k) { 2212 nexti = jl[i]; /* next row to be added to k_th row */ 2213 2214 /* compute multiplier, update diag(k) and U(i,k) */ 2215 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2216 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2217 dk += uikdi * ba[ili]; 2218 ba[ili] = uikdi; /* -U(i,k) */ 2219 2220 /* add multiple of row i to k-th row */ 2221 jmin = ili + 1; 2222 jmax = bi[i + 1]; 2223 if (jmin < jmax) { 2224 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2225 /* update il and jl for row i */ 2226 il[i] = jmin; 2227 j = bj[jmin]; 2228 jl[i] = jl[j]; 2229 jl[j] = i; 2230 } 2231 i = nexti; 2232 } 2233 2234 /* shift the diagonals when zero pivot is detected */ 2235 /* compute rs=sum of abs(off-diagonal) */ 2236 rs = 0.0; 2237 jmin = bi[k] + 1; 2238 nz = bi[k + 1] - jmin; 2239 bcol = bj + jmin; 2240 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2241 2242 sctx.rs = rs; 2243 sctx.pv = dk; 2244 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2245 if (sctx.newshift) break; 2246 dk = sctx.pv; 2247 2248 /* copy data into U(k,:) */ 2249 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2250 jmin = bi[k] + 1; 2251 jmax = bi[k + 1]; 2252 if (jmin < jmax) { 2253 for (j = jmin; j < jmax; j++) { 2254 col = bj[j]; 2255 ba[j] = rtmp[col]; 2256 } 2257 /* add the k-th row into il and jl */ 2258 il[k] = jmin; 2259 i = bj[jmin]; 2260 jl[k] = jl[i]; 2261 jl[i] = k; 2262 } 2263 } 2264 } while (sctx.newshift); 2265 2266 PetscCall(PetscFree3(rtmp, il, jl)); 2267 PetscCall(ISRestoreIndices(ip, &rip)); 2268 PetscCall(ISRestoreIndices(iip, &riip)); 2269 2270 PetscCall(ISIdentity(ip, &perm_identity)); 2271 if (perm_identity) { 2272 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2273 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2274 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2275 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2276 } else { 2277 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2278 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2279 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2280 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2281 } 2282 2283 C->assembled = PETSC_TRUE; 2284 C->preallocated = PETSC_TRUE; 2285 2286 PetscCall(PetscLogFlops(C->rmap->n)); 2287 if (sctx.nshift) { 2288 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2289 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2290 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2291 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2292 } 2293 } 2294 PetscFunctionReturn(0); 2295 } 2296 2297 /* 2298 icc() under revised new data structure. 2299 Factored arrays bj and ba are stored as 2300 U(0,:),...,U(i,:),U(n-1,:) 2301 2302 ui=fact->i is an array of size n+1, in which 2303 ui+ 2304 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2305 ui[n]: points to U(n-1,n-1)+1 2306 2307 udiag=fact->diag is an array of size n,in which 2308 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2309 2310 U(i,:) contains udiag[i] as its last entry, i.e., 2311 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2312 */ 2313 2314 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2315 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2316 Mat_SeqSBAIJ *b; 2317 PetscBool perm_identity, missing; 2318 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2319 const PetscInt *rip, *riip; 2320 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2321 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2322 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2323 PetscReal fill = info->fill, levels = info->levels; 2324 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2325 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2326 PetscBT lnkbt; 2327 IS iperm; 2328 2329 PetscFunctionBegin; 2330 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2331 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2332 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2333 PetscCall(ISIdentity(perm, &perm_identity)); 2334 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2335 2336 PetscCall(PetscMalloc1(am + 1, &ui)); 2337 PetscCall(PetscMalloc1(am + 1, &udiag)); 2338 ui[0] = 0; 2339 2340 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2341 if (!levels && perm_identity) { 2342 for (i = 0; i < am; i++) { 2343 ncols = ai[i + 1] - a->diag[i]; 2344 ui[i + 1] = ui[i] + ncols; 2345 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2346 } 2347 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2348 cols = uj; 2349 for (i = 0; i < am; i++) { 2350 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2351 ncols = ai[i + 1] - a->diag[i] - 1; 2352 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2353 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2354 } 2355 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2356 PetscCall(ISGetIndices(iperm, &riip)); 2357 PetscCall(ISGetIndices(perm, &rip)); 2358 2359 /* initialization */ 2360 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2361 2362 /* jl: linked list for storing indices of the pivot rows 2363 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2364 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2365 for (i = 0; i < am; i++) { 2366 jl[i] = am; 2367 il[i] = 0; 2368 } 2369 2370 /* create and initialize a linked list for storing column indices of the active row k */ 2371 nlnk = am + 1; 2372 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2373 2374 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2375 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2376 current_space = free_space; 2377 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2378 current_space_lvl = free_space_lvl; 2379 2380 for (k = 0; k < am; k++) { /* for each active row k */ 2381 /* initialize lnk by the column indices of row rip[k] of A */ 2382 nzk = 0; 2383 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2384 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2385 ncols_upper = 0; 2386 for (j = 0; j < ncols; j++) { 2387 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2388 if (riip[i] >= k) { /* only take upper triangular entry */ 2389 ajtmp[ncols_upper] = i; 2390 ncols_upper++; 2391 } 2392 } 2393 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2394 nzk += nlnk; 2395 2396 /* update lnk by computing fill-in for each pivot row to be merged in */ 2397 prow = jl[k]; /* 1st pivot row */ 2398 2399 while (prow < k) { 2400 nextprow = jl[prow]; 2401 2402 /* merge prow into k-th row */ 2403 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2404 jmax = ui[prow + 1]; 2405 ncols = jmax - jmin; 2406 i = jmin - ui[prow]; 2407 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2408 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2409 j = *(uj - 1); 2410 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2411 nzk += nlnk; 2412 2413 /* update il and jl for prow */ 2414 if (jmin < jmax) { 2415 il[prow] = jmin; 2416 j = *cols; 2417 jl[prow] = jl[j]; 2418 jl[j] = prow; 2419 } 2420 prow = nextprow; 2421 } 2422 2423 /* if free space is not available, make more free space */ 2424 if (current_space->local_remaining < nzk) { 2425 i = am - k + 1; /* num of unfactored rows */ 2426 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2427 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2428 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2429 reallocs++; 2430 } 2431 2432 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2433 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2434 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2435 2436 /* add the k-th row into il and jl */ 2437 if (nzk > 1) { 2438 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2439 jl[k] = jl[i]; 2440 jl[i] = k; 2441 il[k] = ui[k] + 1; 2442 } 2443 uj_ptr[k] = current_space->array; 2444 uj_lvl_ptr[k] = current_space_lvl->array; 2445 2446 current_space->array += nzk; 2447 current_space->local_used += nzk; 2448 current_space->local_remaining -= nzk; 2449 2450 current_space_lvl->array += nzk; 2451 current_space_lvl->local_used += nzk; 2452 current_space_lvl->local_remaining -= nzk; 2453 2454 ui[k + 1] = ui[k] + nzk; 2455 } 2456 2457 PetscCall(ISRestoreIndices(perm, &rip)); 2458 PetscCall(ISRestoreIndices(iperm, &riip)); 2459 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2460 PetscCall(PetscFree(ajtmp)); 2461 2462 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2463 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2464 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2465 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2466 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2467 2468 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2469 2470 /* put together the new matrix in MATSEQSBAIJ format */ 2471 b = (Mat_SeqSBAIJ *)(fact)->data; 2472 b->singlemalloc = PETSC_FALSE; 2473 2474 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2475 2476 b->j = uj; 2477 b->i = ui; 2478 b->diag = udiag; 2479 b->free_diag = PETSC_TRUE; 2480 b->ilen = NULL; 2481 b->imax = NULL; 2482 b->row = perm; 2483 b->col = perm; 2484 PetscCall(PetscObjectReference((PetscObject)perm)); 2485 PetscCall(PetscObjectReference((PetscObject)perm)); 2486 b->icol = iperm; 2487 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2488 2489 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2490 2491 b->maxnz = b->nz = ui[am]; 2492 b->free_a = PETSC_TRUE; 2493 b->free_ij = PETSC_TRUE; 2494 2495 fact->info.factor_mallocs = reallocs; 2496 fact->info.fill_ratio_given = fill; 2497 if (ai[am] != 0) { 2498 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2499 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2500 } else { 2501 fact->info.fill_ratio_needed = 0.0; 2502 } 2503 #if defined(PETSC_USE_INFO) 2504 if (ai[am] != 0) { 2505 PetscReal af = fact->info.fill_ratio_needed; 2506 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2507 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2508 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2509 } else { 2510 PetscCall(PetscInfo(A, "Empty matrix\n")); 2511 } 2512 #endif 2513 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2518 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2519 Mat_SeqSBAIJ *b; 2520 PetscBool perm_identity, missing; 2521 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2522 const PetscInt *rip, *riip; 2523 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2524 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2525 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2526 PetscReal fill = info->fill, levels = info->levels; 2527 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2528 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2529 PetscBT lnkbt; 2530 IS iperm; 2531 2532 PetscFunctionBegin; 2533 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2534 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2535 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2536 PetscCall(ISIdentity(perm, &perm_identity)); 2537 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2538 2539 PetscCall(PetscMalloc1(am + 1, &ui)); 2540 PetscCall(PetscMalloc1(am + 1, &udiag)); 2541 ui[0] = 0; 2542 2543 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2544 if (!levels && perm_identity) { 2545 for (i = 0; i < am; i++) { 2546 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2547 udiag[i] = ui[i]; 2548 } 2549 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2550 cols = uj; 2551 for (i = 0; i < am; i++) { 2552 aj = a->j + a->diag[i]; 2553 ncols = ui[i + 1] - ui[i]; 2554 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2555 } 2556 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2557 PetscCall(ISGetIndices(iperm, &riip)); 2558 PetscCall(ISGetIndices(perm, &rip)); 2559 2560 /* initialization */ 2561 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2562 2563 /* jl: linked list for storing indices of the pivot rows 2564 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2565 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2566 for (i = 0; i < am; i++) { 2567 jl[i] = am; 2568 il[i] = 0; 2569 } 2570 2571 /* create and initialize a linked list for storing column indices of the active row k */ 2572 nlnk = am + 1; 2573 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2574 2575 /* initial FreeSpace size is fill*(ai[am]+1) */ 2576 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2577 current_space = free_space; 2578 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2579 current_space_lvl = free_space_lvl; 2580 2581 for (k = 0; k < am; k++) { /* for each active row k */ 2582 /* initialize lnk by the column indices of row rip[k] of A */ 2583 nzk = 0; 2584 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2585 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2586 ncols_upper = 0; 2587 for (j = 0; j < ncols; j++) { 2588 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2589 if (riip[i] >= k) { /* only take upper triangular entry */ 2590 ajtmp[ncols_upper] = i; 2591 ncols_upper++; 2592 } 2593 } 2594 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2595 nzk += nlnk; 2596 2597 /* update lnk by computing fill-in for each pivot row to be merged in */ 2598 prow = jl[k]; /* 1st pivot row */ 2599 2600 while (prow < k) { 2601 nextprow = jl[prow]; 2602 2603 /* merge prow into k-th row */ 2604 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2605 jmax = ui[prow + 1]; 2606 ncols = jmax - jmin; 2607 i = jmin - ui[prow]; 2608 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2609 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2610 j = *(uj - 1); 2611 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2612 nzk += nlnk; 2613 2614 /* update il and jl for prow */ 2615 if (jmin < jmax) { 2616 il[prow] = jmin; 2617 j = *cols; 2618 jl[prow] = jl[j]; 2619 jl[j] = prow; 2620 } 2621 prow = nextprow; 2622 } 2623 2624 /* if free space is not available, make more free space */ 2625 if (current_space->local_remaining < nzk) { 2626 i = am - k + 1; /* num of unfactored rows */ 2627 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2628 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2629 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2630 reallocs++; 2631 } 2632 2633 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2634 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2635 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2636 2637 /* add the k-th row into il and jl */ 2638 if (nzk > 1) { 2639 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2640 jl[k] = jl[i]; 2641 jl[i] = k; 2642 il[k] = ui[k] + 1; 2643 } 2644 uj_ptr[k] = current_space->array; 2645 uj_lvl_ptr[k] = current_space_lvl->array; 2646 2647 current_space->array += nzk; 2648 current_space->local_used += nzk; 2649 current_space->local_remaining -= nzk; 2650 2651 current_space_lvl->array += nzk; 2652 current_space_lvl->local_used += nzk; 2653 current_space_lvl->local_remaining -= nzk; 2654 2655 ui[k + 1] = ui[k] + nzk; 2656 } 2657 2658 #if defined(PETSC_USE_INFO) 2659 if (ai[am] != 0) { 2660 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2661 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2662 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2663 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2664 } else { 2665 PetscCall(PetscInfo(A, "Empty matrix\n")); 2666 } 2667 #endif 2668 2669 PetscCall(ISRestoreIndices(perm, &rip)); 2670 PetscCall(ISRestoreIndices(iperm, &riip)); 2671 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2672 PetscCall(PetscFree(ajtmp)); 2673 2674 /* destroy list of free space and other temporary array(s) */ 2675 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2676 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2677 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2678 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2679 2680 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2681 2682 /* put together the new matrix in MATSEQSBAIJ format */ 2683 2684 b = (Mat_SeqSBAIJ *)fact->data; 2685 b->singlemalloc = PETSC_FALSE; 2686 2687 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2688 2689 b->j = uj; 2690 b->i = ui; 2691 b->diag = udiag; 2692 b->free_diag = PETSC_TRUE; 2693 b->ilen = NULL; 2694 b->imax = NULL; 2695 b->row = perm; 2696 b->col = perm; 2697 2698 PetscCall(PetscObjectReference((PetscObject)perm)); 2699 PetscCall(PetscObjectReference((PetscObject)perm)); 2700 2701 b->icol = iperm; 2702 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2703 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2704 b->maxnz = b->nz = ui[am]; 2705 b->free_a = PETSC_TRUE; 2706 b->free_ij = PETSC_TRUE; 2707 2708 fact->info.factor_mallocs = reallocs; 2709 fact->info.fill_ratio_given = fill; 2710 if (ai[am] != 0) { 2711 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2712 } else { 2713 fact->info.fill_ratio_needed = 0.0; 2714 } 2715 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2716 PetscFunctionReturn(0); 2717 } 2718 2719 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2720 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2721 Mat_SeqSBAIJ *b; 2722 PetscBool perm_identity, missing; 2723 PetscReal fill = info->fill; 2724 const PetscInt *rip, *riip; 2725 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2726 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2727 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2728 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2729 PetscBT lnkbt; 2730 IS iperm; 2731 2732 PetscFunctionBegin; 2733 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2734 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2735 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2736 2737 /* check whether perm is the identity mapping */ 2738 PetscCall(ISIdentity(perm, &perm_identity)); 2739 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2740 PetscCall(ISGetIndices(iperm, &riip)); 2741 PetscCall(ISGetIndices(perm, &rip)); 2742 2743 /* initialization */ 2744 PetscCall(PetscMalloc1(am + 1, &ui)); 2745 PetscCall(PetscMalloc1(am + 1, &udiag)); 2746 ui[0] = 0; 2747 2748 /* jl: linked list for storing indices of the pivot rows 2749 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2750 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2751 for (i = 0; i < am; i++) { 2752 jl[i] = am; 2753 il[i] = 0; 2754 } 2755 2756 /* create and initialize a linked list for storing column indices of the active row k */ 2757 nlnk = am + 1; 2758 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2759 2760 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2761 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2762 current_space = free_space; 2763 2764 for (k = 0; k < am; k++) { /* for each active row k */ 2765 /* initialize lnk by the column indices of row rip[k] of A */ 2766 nzk = 0; 2767 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2768 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2769 ncols_upper = 0; 2770 for (j = 0; j < ncols; j++) { 2771 i = riip[*(aj + ai[rip[k]] + j)]; 2772 if (i >= k) { /* only take upper triangular entry */ 2773 cols[ncols_upper] = i; 2774 ncols_upper++; 2775 } 2776 } 2777 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2778 nzk += nlnk; 2779 2780 /* update lnk by computing fill-in for each pivot row to be merged in */ 2781 prow = jl[k]; /* 1st pivot row */ 2782 2783 while (prow < k) { 2784 nextprow = jl[prow]; 2785 /* merge prow into k-th row */ 2786 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2787 jmax = ui[prow + 1]; 2788 ncols = jmax - jmin; 2789 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2790 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2791 nzk += nlnk; 2792 2793 /* update il and jl for prow */ 2794 if (jmin < jmax) { 2795 il[prow] = jmin; 2796 j = *uj_ptr; 2797 jl[prow] = jl[j]; 2798 jl[j] = prow; 2799 } 2800 prow = nextprow; 2801 } 2802 2803 /* if free space is not available, make more free space */ 2804 if (current_space->local_remaining < nzk) { 2805 i = am - k + 1; /* num of unfactored rows */ 2806 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2807 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2808 reallocs++; 2809 } 2810 2811 /* copy data into free space, then initialize lnk */ 2812 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2813 2814 /* add the k-th row into il and jl */ 2815 if (nzk > 1) { 2816 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2817 jl[k] = jl[i]; 2818 jl[i] = k; 2819 il[k] = ui[k] + 1; 2820 } 2821 ui_ptr[k] = current_space->array; 2822 2823 current_space->array += nzk; 2824 current_space->local_used += nzk; 2825 current_space->local_remaining -= nzk; 2826 2827 ui[k + 1] = ui[k] + nzk; 2828 } 2829 2830 PetscCall(ISRestoreIndices(perm, &rip)); 2831 PetscCall(ISRestoreIndices(iperm, &riip)); 2832 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2833 2834 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2835 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2836 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2837 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2838 2839 /* put together the new matrix in MATSEQSBAIJ format */ 2840 2841 b = (Mat_SeqSBAIJ *)fact->data; 2842 b->singlemalloc = PETSC_FALSE; 2843 b->free_a = PETSC_TRUE; 2844 b->free_ij = PETSC_TRUE; 2845 2846 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2847 2848 b->j = uj; 2849 b->i = ui; 2850 b->diag = udiag; 2851 b->free_diag = PETSC_TRUE; 2852 b->ilen = NULL; 2853 b->imax = NULL; 2854 b->row = perm; 2855 b->col = perm; 2856 2857 PetscCall(PetscObjectReference((PetscObject)perm)); 2858 PetscCall(PetscObjectReference((PetscObject)perm)); 2859 2860 b->icol = iperm; 2861 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2862 2863 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2864 2865 b->maxnz = b->nz = ui[am]; 2866 2867 fact->info.factor_mallocs = reallocs; 2868 fact->info.fill_ratio_given = fill; 2869 if (ai[am] != 0) { 2870 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2871 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2872 } else { 2873 fact->info.fill_ratio_needed = 0.0; 2874 } 2875 #if defined(PETSC_USE_INFO) 2876 if (ai[am] != 0) { 2877 PetscReal af = fact->info.fill_ratio_needed; 2878 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2879 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2880 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2881 } else { 2882 PetscCall(PetscInfo(A, "Empty matrix\n")); 2883 } 2884 #endif 2885 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2886 PetscFunctionReturn(0); 2887 } 2888 2889 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2890 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2891 Mat_SeqSBAIJ *b; 2892 PetscBool perm_identity, missing; 2893 PetscReal fill = info->fill; 2894 const PetscInt *rip, *riip; 2895 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2896 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2897 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2898 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2899 PetscBT lnkbt; 2900 IS iperm; 2901 2902 PetscFunctionBegin; 2903 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2904 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2905 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2906 2907 /* check whether perm is the identity mapping */ 2908 PetscCall(ISIdentity(perm, &perm_identity)); 2909 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2910 PetscCall(ISGetIndices(iperm, &riip)); 2911 PetscCall(ISGetIndices(perm, &rip)); 2912 2913 /* initialization */ 2914 PetscCall(PetscMalloc1(am + 1, &ui)); 2915 ui[0] = 0; 2916 2917 /* jl: linked list for storing indices of the pivot rows 2918 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2919 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2920 for (i = 0; i < am; i++) { 2921 jl[i] = am; 2922 il[i] = 0; 2923 } 2924 2925 /* create and initialize a linked list for storing column indices of the active row k */ 2926 nlnk = am + 1; 2927 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2928 2929 /* initial FreeSpace size is fill*(ai[am]+1) */ 2930 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2931 current_space = free_space; 2932 2933 for (k = 0; k < am; k++) { /* for each active row k */ 2934 /* initialize lnk by the column indices of row rip[k] of A */ 2935 nzk = 0; 2936 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2937 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2938 ncols_upper = 0; 2939 for (j = 0; j < ncols; j++) { 2940 i = riip[*(aj + ai[rip[k]] + j)]; 2941 if (i >= k) { /* only take upper triangular entry */ 2942 cols[ncols_upper] = i; 2943 ncols_upper++; 2944 } 2945 } 2946 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2947 nzk += nlnk; 2948 2949 /* update lnk by computing fill-in for each pivot row to be merged in */ 2950 prow = jl[k]; /* 1st pivot row */ 2951 2952 while (prow < k) { 2953 nextprow = jl[prow]; 2954 /* merge prow into k-th row */ 2955 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2956 jmax = ui[prow + 1]; 2957 ncols = jmax - jmin; 2958 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2959 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2960 nzk += nlnk; 2961 2962 /* update il and jl for prow */ 2963 if (jmin < jmax) { 2964 il[prow] = jmin; 2965 j = *uj_ptr; 2966 jl[prow] = jl[j]; 2967 jl[j] = prow; 2968 } 2969 prow = nextprow; 2970 } 2971 2972 /* if free space is not available, make more free space */ 2973 if (current_space->local_remaining < nzk) { 2974 i = am - k + 1; /* num of unfactored rows */ 2975 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2976 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2977 reallocs++; 2978 } 2979 2980 /* copy data into free space, then initialize lnk */ 2981 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2982 2983 /* add the k-th row into il and jl */ 2984 if (nzk - 1 > 0) { 2985 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2986 jl[k] = jl[i]; 2987 jl[i] = k; 2988 il[k] = ui[k] + 1; 2989 } 2990 ui_ptr[k] = current_space->array; 2991 2992 current_space->array += nzk; 2993 current_space->local_used += nzk; 2994 current_space->local_remaining -= nzk; 2995 2996 ui[k + 1] = ui[k] + nzk; 2997 } 2998 2999 #if defined(PETSC_USE_INFO) 3000 if (ai[am] != 0) { 3001 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3002 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3003 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3004 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3005 } else { 3006 PetscCall(PetscInfo(A, "Empty matrix\n")); 3007 } 3008 #endif 3009 3010 PetscCall(ISRestoreIndices(perm, &rip)); 3011 PetscCall(ISRestoreIndices(iperm, &riip)); 3012 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3013 3014 /* destroy list of free space and other temporary array(s) */ 3015 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3016 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3017 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3018 3019 /* put together the new matrix in MATSEQSBAIJ format */ 3020 3021 b = (Mat_SeqSBAIJ *)fact->data; 3022 b->singlemalloc = PETSC_FALSE; 3023 b->free_a = PETSC_TRUE; 3024 b->free_ij = PETSC_TRUE; 3025 3026 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3027 3028 b->j = uj; 3029 b->i = ui; 3030 b->diag = NULL; 3031 b->ilen = NULL; 3032 b->imax = NULL; 3033 b->row = perm; 3034 b->col = perm; 3035 3036 PetscCall(PetscObjectReference((PetscObject)perm)); 3037 PetscCall(PetscObjectReference((PetscObject)perm)); 3038 3039 b->icol = iperm; 3040 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3041 3042 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3043 b->maxnz = b->nz = ui[am]; 3044 3045 fact->info.factor_mallocs = reallocs; 3046 fact->info.fill_ratio_given = fill; 3047 if (ai[am] != 0) { 3048 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3049 } else { 3050 fact->info.fill_ratio_needed = 0.0; 3051 } 3052 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3053 PetscFunctionReturn(0); 3054 } 3055 3056 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) { 3057 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3058 PetscInt n = A->rmap->n; 3059 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3060 PetscScalar *x, sum; 3061 const PetscScalar *b; 3062 const MatScalar *aa = a->a, *v; 3063 PetscInt i, nz; 3064 3065 PetscFunctionBegin; 3066 if (!n) PetscFunctionReturn(0); 3067 3068 PetscCall(VecGetArrayRead(bb, &b)); 3069 PetscCall(VecGetArrayWrite(xx, &x)); 3070 3071 /* forward solve the lower triangular */ 3072 x[0] = b[0]; 3073 v = aa; 3074 vi = aj; 3075 for (i = 1; i < n; i++) { 3076 nz = ai[i + 1] - ai[i]; 3077 sum = b[i]; 3078 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3079 v += nz; 3080 vi += nz; 3081 x[i] = sum; 3082 } 3083 3084 /* backward solve the upper triangular */ 3085 for (i = n - 1; i >= 0; i--) { 3086 v = aa + adiag[i + 1] + 1; 3087 vi = aj + adiag[i + 1] + 1; 3088 nz = adiag[i] - adiag[i + 1] - 1; 3089 sum = x[i]; 3090 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3091 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3092 } 3093 3094 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3095 PetscCall(VecRestoreArrayRead(bb, &b)); 3096 PetscCall(VecRestoreArrayWrite(xx, &x)); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) { 3101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3102 IS iscol = a->col, isrow = a->row; 3103 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3104 const PetscInt *rout, *cout, *r, *c; 3105 PetscScalar *x, *tmp, sum; 3106 const PetscScalar *b; 3107 const MatScalar *aa = a->a, *v; 3108 3109 PetscFunctionBegin; 3110 if (!n) PetscFunctionReturn(0); 3111 3112 PetscCall(VecGetArrayRead(bb, &b)); 3113 PetscCall(VecGetArrayWrite(xx, &x)); 3114 tmp = a->solve_work; 3115 3116 PetscCall(ISGetIndices(isrow, &rout)); 3117 r = rout; 3118 PetscCall(ISGetIndices(iscol, &cout)); 3119 c = cout; 3120 3121 /* forward solve the lower triangular */ 3122 tmp[0] = b[r[0]]; 3123 v = aa; 3124 vi = aj; 3125 for (i = 1; i < n; i++) { 3126 nz = ai[i + 1] - ai[i]; 3127 sum = b[r[i]]; 3128 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3129 tmp[i] = sum; 3130 v += nz; 3131 vi += nz; 3132 } 3133 3134 /* backward solve the upper triangular */ 3135 for (i = n - 1; i >= 0; i--) { 3136 v = aa + adiag[i + 1] + 1; 3137 vi = aj + adiag[i + 1] + 1; 3138 nz = adiag[i] - adiag[i + 1] - 1; 3139 sum = tmp[i]; 3140 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3141 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3142 } 3143 3144 PetscCall(ISRestoreIndices(isrow, &rout)); 3145 PetscCall(ISRestoreIndices(iscol, &cout)); 3146 PetscCall(VecRestoreArrayRead(bb, &b)); 3147 PetscCall(VecRestoreArrayWrite(xx, &x)); 3148 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 /* 3153 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3154 */ 3155 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) { 3156 Mat B = *fact; 3157 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3158 IS isicol; 3159 const PetscInt *r, *ic; 3160 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3161 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3162 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3163 PetscInt nlnk, *lnk; 3164 PetscBT lnkbt; 3165 PetscBool row_identity, icol_identity; 3166 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3167 const PetscInt *ics; 3168 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3169 PetscReal dt = info->dt, shift = info->shiftamount; 3170 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3171 PetscBool missing; 3172 3173 PetscFunctionBegin; 3174 if (dt == PETSC_DEFAULT) dt = 0.005; 3175 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3176 3177 /* ------- symbolic factorization, can be reused ---------*/ 3178 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3179 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3180 adiag = a->diag; 3181 3182 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3183 3184 /* bdiag is location of diagonal in factor */ 3185 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3186 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3187 3188 /* allocate row pointers bi */ 3189 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3190 3191 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3192 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3193 nnz_max = ai[n] + 2 * n * dtcount + 2; 3194 3195 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3196 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3197 3198 /* put together the new matrix */ 3199 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3200 b = (Mat_SeqAIJ *)B->data; 3201 3202 b->free_a = PETSC_TRUE; 3203 b->free_ij = PETSC_TRUE; 3204 b->singlemalloc = PETSC_FALSE; 3205 3206 b->a = ba; 3207 b->j = bj; 3208 b->i = bi; 3209 b->diag = bdiag; 3210 b->ilen = NULL; 3211 b->imax = NULL; 3212 b->row = isrow; 3213 b->col = iscol; 3214 PetscCall(PetscObjectReference((PetscObject)isrow)); 3215 PetscCall(PetscObjectReference((PetscObject)iscol)); 3216 b->icol = isicol; 3217 3218 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3219 b->maxnz = nnz_max; 3220 3221 B->factortype = MAT_FACTOR_ILUDT; 3222 B->info.factor_mallocs = 0; 3223 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3224 /* ------- end of symbolic factorization ---------*/ 3225 3226 PetscCall(ISGetIndices(isrow, &r)); 3227 PetscCall(ISGetIndices(isicol, &ic)); 3228 ics = ic; 3229 3230 /* linked list for storing column indices of the active row */ 3231 nlnk = n + 1; 3232 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3233 3234 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3235 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3236 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3237 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3238 PetscCall(PetscArrayzero(rtmp, n)); 3239 3240 bi[0] = 0; 3241 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3242 bdiag_rev[n] = bdiag[0]; 3243 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3244 for (i = 0; i < n; i++) { 3245 /* copy initial fill into linked list */ 3246 nzi = ai[r[i] + 1] - ai[r[i]]; 3247 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3248 nzi_al = adiag[r[i]] - ai[r[i]]; 3249 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3250 ajtmp = aj + ai[r[i]]; 3251 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3252 3253 /* load in initial (unfactored row) */ 3254 aatmp = a->a + ai[r[i]]; 3255 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3256 3257 /* add pivot rows into linked list */ 3258 row = lnk[n]; 3259 while (row < i) { 3260 nzi_bl = bi[row + 1] - bi[row] + 1; 3261 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3262 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3263 nzi += nlnk; 3264 row = lnk[row]; 3265 } 3266 3267 /* copy data from lnk into jtmp, then initialize lnk */ 3268 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3269 3270 /* numerical factorization */ 3271 bjtmp = jtmp; 3272 row = *bjtmp++; /* 1st pivot row */ 3273 while (row < i) { 3274 pc = rtmp + row; 3275 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3276 multiplier = (*pc) * (*pv); 3277 *pc = multiplier; 3278 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3279 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3280 pv = ba + bdiag[row + 1] + 1; 3281 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3282 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3283 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3284 } 3285 row = *bjtmp++; 3286 } 3287 3288 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3289 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3290 nzi_bl = 0; 3291 j = 0; 3292 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3293 vtmp[j] = rtmp[jtmp[j]]; 3294 rtmp[jtmp[j]] = 0.0; 3295 nzi_bl++; 3296 j++; 3297 } 3298 nzi_bu = nzi - nzi_bl - 1; 3299 while (j < nzi) { 3300 vtmp[j] = rtmp[jtmp[j]]; 3301 rtmp[jtmp[j]] = 0.0; 3302 j++; 3303 } 3304 3305 bjtmp = bj + bi[i]; 3306 batmp = ba + bi[i]; 3307 /* apply level dropping rule to L part */ 3308 ncut = nzi_al + dtcount; 3309 if (ncut < nzi_bl) { 3310 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3311 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3312 } else { 3313 ncut = nzi_bl; 3314 } 3315 for (j = 0; j < ncut; j++) { 3316 bjtmp[j] = jtmp[j]; 3317 batmp[j] = vtmp[j]; 3318 } 3319 bi[i + 1] = bi[i] + ncut; 3320 nzi = ncut + 1; 3321 3322 /* apply level dropping rule to U part */ 3323 ncut = nzi_au + dtcount; 3324 if (ncut < nzi_bu) { 3325 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3326 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3327 } else { 3328 ncut = nzi_bu; 3329 } 3330 nzi += ncut; 3331 3332 /* mark bdiagonal */ 3333 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3334 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3335 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3336 bjtmp = bj + bdiag[i]; 3337 batmp = ba + bdiag[i]; 3338 *bjtmp = i; 3339 *batmp = diag_tmp; /* rtmp[i]; */ 3340 if (*batmp == 0.0) *batmp = dt + shift; 3341 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3342 3343 bjtmp = bj + bdiag[i + 1] + 1; 3344 batmp = ba + bdiag[i + 1] + 1; 3345 for (k = 0; k < ncut; k++) { 3346 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3347 batmp[k] = vtmp[nzi_bl + 1 + k]; 3348 } 3349 3350 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3351 } /* for (i=0; i<n; i++) */ 3352 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3353 3354 PetscCall(ISRestoreIndices(isrow, &r)); 3355 PetscCall(ISRestoreIndices(isicol, &ic)); 3356 3357 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3358 PetscCall(PetscFree2(im, jtmp)); 3359 PetscCall(PetscFree2(rtmp, vtmp)); 3360 PetscCall(PetscFree(bdiag_rev)); 3361 3362 PetscCall(PetscLogFlops(B->cmap->n)); 3363 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3364 3365 PetscCall(ISIdentity(isrow, &row_identity)); 3366 PetscCall(ISIdentity(isicol, &icol_identity)); 3367 if (row_identity && icol_identity) { 3368 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3369 } else { 3370 B->ops->solve = MatSolve_SeqAIJ; 3371 } 3372 3373 B->ops->solveadd = NULL; 3374 B->ops->solvetranspose = NULL; 3375 B->ops->solvetransposeadd = NULL; 3376 B->ops->matsolve = NULL; 3377 B->assembled = PETSC_TRUE; 3378 B->preallocated = PETSC_TRUE; 3379 PetscFunctionReturn(0); 3380 } 3381 3382 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3383 /* 3384 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3385 */ 3386 3387 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 3388 PetscFunctionBegin; 3389 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /* 3394 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3395 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3396 */ 3397 /* 3398 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3399 */ 3400 3401 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) { 3402 Mat C = fact; 3403 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3404 IS isrow = b->row, isicol = b->icol; 3405 const PetscInt *r, *ic, *ics; 3406 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3407 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3408 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3409 PetscReal dt = info->dt, shift = info->shiftamount; 3410 PetscBool row_identity, col_identity; 3411 3412 PetscFunctionBegin; 3413 PetscCall(ISGetIndices(isrow, &r)); 3414 PetscCall(ISGetIndices(isicol, &ic)); 3415 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3416 ics = ic; 3417 3418 for (i = 0; i < n; i++) { 3419 /* initialize rtmp array */ 3420 nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 3421 bjtmp = bj + bi[i]; 3422 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3423 rtmp[i] = 0.0; 3424 nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3425 bjtmp = bj + bdiag[i + 1] + 1; 3426 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3427 3428 /* load in initial unfactored row of A */ 3429 nz = ai[r[i] + 1] - ai[r[i]]; 3430 ajtmp = aj + ai[r[i]]; 3431 v = aa + ai[r[i]]; 3432 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3433 3434 /* numerical factorization */ 3435 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3436 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3437 k = 0; 3438 while (k < nzl) { 3439 row = *bjtmp++; 3440 pc = rtmp + row; 3441 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3442 multiplier = (*pc) * (*pv); 3443 *pc = multiplier; 3444 if (PetscAbsScalar(multiplier) > dt) { 3445 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3446 pv = b->a + bdiag[row + 1] + 1; 3447 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3448 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3449 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3450 } 3451 k++; 3452 } 3453 3454 /* finished row so stick it into b->a */ 3455 /* L-part */ 3456 pv = b->a + bi[i]; 3457 pj = bj + bi[i]; 3458 nzl = bi[i + 1] - bi[i]; 3459 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3460 3461 /* diagonal: invert diagonal entries for simpler triangular solves */ 3462 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3463 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3464 3465 /* U-part */ 3466 pv = b->a + bdiag[i + 1] + 1; 3467 pj = bj + bdiag[i + 1] + 1; 3468 nzu = bdiag[i] - bdiag[i + 1] - 1; 3469 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3470 } 3471 3472 PetscCall(PetscFree(rtmp)); 3473 PetscCall(ISRestoreIndices(isicol, &ic)); 3474 PetscCall(ISRestoreIndices(isrow, &r)); 3475 3476 PetscCall(ISIdentity(isrow, &row_identity)); 3477 PetscCall(ISIdentity(isicol, &col_identity)); 3478 if (row_identity && col_identity) { 3479 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3480 } else { 3481 C->ops->solve = MatSolve_SeqAIJ; 3482 } 3483 C->ops->solveadd = NULL; 3484 C->ops->solvetranspose = NULL; 3485 C->ops->solvetransposeadd = NULL; 3486 C->ops->matsolve = NULL; 3487 C->assembled = PETSC_TRUE; 3488 C->preallocated = PETSC_TRUE; 3489 3490 PetscCall(PetscLogFlops(C->cmap->n)); 3491 PetscFunctionReturn(0); 3492 } 3493