1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 #if 0 13 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 14 { 15 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 16 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 17 const PetscInt *ai = a->i, *aj = a->j; 18 const PetscScalar *aa = a->a; 19 PetscBool *done; 20 PetscReal best, past = 0, future; 21 22 PetscFunctionBegin; 23 /* pick initial row */ 24 best = -1; 25 for (i = 0; i < n; i++) { 26 future = 0.0; 27 for (j = ai[i]; j < ai[i + 1]; j++) { 28 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 29 else past = PetscAbsScalar(aa[j]); 30 } 31 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 32 if (past / future > best) { 33 best = past / future; 34 current = i; 35 } 36 } 37 38 PetscCall(PetscMalloc1(n, &done)); 39 PetscCall(PetscArrayzero(done, n)); 40 PetscCall(PetscMalloc1(n, &order)); 41 order[0] = current; 42 for (i = 0; i < n - 1; i++) { 43 done[current] = PETSC_TRUE; 44 best = -1; 45 /* loop over all neighbors of current pivot */ 46 for (j = ai[current]; j < ai[current + 1]; j++) { 47 jj = aj[j]; 48 if (done[jj]) continue; 49 /* loop over columns of potential next row computing weights for below and above diagonal */ 50 past = future = 0.0; 51 for (k = ai[jj]; k < ai[jj + 1]; k++) { 52 kk = aj[k]; 53 if (done[kk]) past += PetscAbsScalar(aa[k]); 54 else if (kk != jj) future += PetscAbsScalar(aa[k]); 55 } 56 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 57 if (past / future > best) { 58 best = past / future; 59 newcurrent = jj; 60 } 61 } 62 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 63 best = -1; 64 for (k = 0; k < n; k++) { 65 if (done[k]) continue; 66 future = 0.0; 67 past = 0.0; 68 for (j = ai[k]; j < ai[k + 1]; j++) { 69 kk = aj[j]; 70 if (done[kk]) past += PetscAbsScalar(aa[j]); 71 else if (kk != k) future += PetscAbsScalar(aa[j]); 72 } 73 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 74 if (past / future > best) { 75 best = past / future; 76 newcurrent = k; 77 } 78 } 79 } 80 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 81 current = newcurrent; 82 order[i + 1] = current; 83 } 84 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 85 *icol = *irow; 86 PetscCall(PetscObjectReference((PetscObject)*irow)); 87 PetscCall(PetscFree(done)); 88 PetscCall(PetscFree(order)); 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #endif 92 93 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 94 { 95 PetscFunctionBegin; 96 *type = MATSOLVERPETSC; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 101 { 102 PetscInt n = A->rmap->n; 103 104 PetscFunctionBegin; 105 #if defined(PETSC_USE_COMPLEX) 106 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 107 #endif 108 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 109 PetscCall(MatSetSizes(*B, n, n, n, n)); 110 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 111 PetscCall(MatSetType(*B, MATSEQAIJ)); 112 113 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 114 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 115 116 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 117 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 118 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 119 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 120 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 121 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 122 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 123 124 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 125 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 126 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 127 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 128 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 129 (*B)->factortype = ftype; 130 131 PetscCall(PetscFree((*B)->solvertype)); 132 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 133 (*B)->canuseordering = PETSC_TRUE; 134 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 138 #if 0 139 // currently unused 140 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 141 { 142 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 143 IS isicol; 144 const PetscInt *r, *ic; 145 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 146 PetscInt *bi, *bj, *ajtmp; 147 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 148 PetscReal f; 149 PetscInt nlnk, *lnk, k, **bi_ptr; 150 PetscFreeSpaceList free_space = NULL, current_space = NULL; 151 PetscBT lnkbt; 152 PetscBool missing; 153 154 PetscFunctionBegin; 155 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 156 PetscCall(MatMissingDiagonal(A, &missing, &i)); 157 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 158 159 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 160 PetscCall(ISGetIndices(isrow, &r)); 161 PetscCall(ISGetIndices(isicol, &ic)); 162 163 /* get new row pointers */ 164 PetscCall(PetscMalloc1(n + 1, &bi)); 165 bi[0] = 0; 166 167 /* bdiag is location of diagonal in factor */ 168 PetscCall(PetscMalloc1(n + 1, &bdiag)); 169 bdiag[0] = 0; 170 171 /* linked list for storing column indices of the active row */ 172 nlnk = n + 1; 173 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 174 175 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 176 177 /* initial FreeSpace size is f*(ai[n]+1) */ 178 f = info->fill; 179 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 180 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 181 current_space = free_space; 182 183 for (i = 0; i < n; i++) { 184 /* copy previous fill into linked list */ 185 nzi = 0; 186 nnz = ai[r[i] + 1] - ai[r[i]]; 187 ajtmp = aj + ai[r[i]]; 188 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 189 nzi += nlnk; 190 191 /* add pivot rows into linked list */ 192 row = lnk[n]; 193 while (row < i) { 194 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 195 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 196 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 197 nzi += nlnk; 198 row = lnk[row]; 199 } 200 bi[i + 1] = bi[i] + nzi; 201 im[i] = nzi; 202 203 /* mark bdiag */ 204 nzbd = 0; 205 nnz = nzi; 206 k = lnk[n]; 207 while (nnz-- && k < i) { 208 nzbd++; 209 k = lnk[k]; 210 } 211 bdiag[i] = bi[i] + nzbd; 212 213 /* if free space is not available, make more free space */ 214 if (current_space->local_remaining < nzi) { 215 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 216 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 217 reallocs++; 218 } 219 220 /* copy data into free space, then initialize lnk */ 221 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 222 223 bi_ptr[i] = current_space->array; 224 current_space->array += nzi; 225 current_space->local_used += nzi; 226 current_space->local_remaining -= nzi; 227 } 228 #if defined(PETSC_USE_INFO) 229 if (ai[n] != 0) { 230 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 231 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 232 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 233 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 234 PetscCall(PetscInfo(A, "for best performance.\n")); 235 } else { 236 PetscCall(PetscInfo(A, "Empty matrix\n")); 237 } 238 #endif 239 240 PetscCall(ISRestoreIndices(isrow, &r)); 241 PetscCall(ISRestoreIndices(isicol, &ic)); 242 243 /* destroy list of free space and other temporary array(s) */ 244 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 245 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 246 PetscCall(PetscLLDestroy(lnk, lnkbt)); 247 PetscCall(PetscFree2(bi_ptr, im)); 248 249 /* put together the new matrix */ 250 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 251 b = (Mat_SeqAIJ *)(B)->data; 252 253 b->free_a = PETSC_TRUE; 254 b->free_ij = PETSC_TRUE; 255 b->singlemalloc = PETSC_FALSE; 256 257 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 PetscCall(PetscObjectReference((PetscObject)isrow)); 266 PetscCall(PetscObjectReference((PetscObject)iscol)); 267 b->icol = isicol; 268 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 b->maxnz = b->nz = bi[n]; 272 273 (B)->factortype = MAT_FACTOR_LU; 274 (B)->info.factor_mallocs = reallocs; 275 (B)->info.fill_ratio_given = f; 276 277 if (ai[n]) { 278 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 279 } else { 280 (B)->info.fill_ratio_needed = 0.0; 281 } 282 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 283 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 #endif 287 288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 291 IS isicol; 292 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 293 PetscInt i, n = A->rmap->n; 294 PetscInt *bi, *bj; 295 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 296 PetscReal f; 297 PetscInt nlnk, *lnk, k, **bi_ptr; 298 PetscFreeSpaceList free_space = NULL, current_space = NULL; 299 PetscBT lnkbt; 300 PetscBool missing; 301 302 PetscFunctionBegin; 303 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 304 PetscCall(MatMissingDiagonal(A, &missing, &i)); 305 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 306 307 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 308 PetscCall(ISGetIndices(isrow, &r)); 309 PetscCall(ISGetIndices(isicol, &ic)); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 PetscCall(PetscMalloc1(n + 1, &bi)); 313 PetscCall(PetscMalloc1(n + 1, &bdiag)); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 319 320 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 325 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 326 current_space = free_space; 327 328 for (i = 0; i < n; i++) { 329 /* copy previous fill into linked list */ 330 nzi = 0; 331 nnz = ai[r[i] + 1] - ai[r[i]]; 332 ajtmp = aj + ai[r[i]]; 333 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i) { 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i + 1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i) { 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining < nzi) { 360 /* estimated additional space needed */ 361 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 362 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 368 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 375 PetscCall(ISRestoreIndices(isrow, &r)); 376 PetscCall(ISRestoreIndices(isicol, &ic)); 377 378 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 380 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 381 PetscCall(PetscLLDestroy(lnk, lnkbt)); 382 PetscCall(PetscFree2(bi_ptr, im)); 383 384 /* put together the new matrix */ 385 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 386 b = (Mat_SeqAIJ *)(B)->data; 387 388 b->free_a = PETSC_TRUE; 389 b->free_ij = PETSC_TRUE; 390 b->singlemalloc = PETSC_FALSE; 391 392 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 393 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = NULL; 398 b->imax = NULL; 399 b->row = isrow; 400 b->col = iscol; 401 PetscCall(PetscObjectReference((PetscObject)isrow)); 402 PetscCall(PetscObjectReference((PetscObject)iscol)); 403 b->icol = isicol; 404 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 b->maxnz = b->nz = bdiag[0] + 1; 408 409 B->factortype = MAT_FACTOR_LU; 410 B->info.factor_mallocs = reallocs; 411 B->info.fill_ratio_given = f; 412 413 if (ai[n]) { 414 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 415 } else { 416 B->info.fill_ratio_needed = 0.0; 417 } 418 #if defined(PETSC_USE_INFO) 419 if (ai[n] != 0) { 420 PetscReal af = B->info.fill_ratio_needed; 421 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 422 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 423 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 424 PetscCall(PetscInfo(A, "for best performance.\n")); 425 } else { 426 PetscCall(PetscInfo(A, "Empty matrix\n")); 427 } 428 #endif 429 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 430 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 431 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 /* 436 Trouble in factorization, should we dump the original matrix? 437 */ 438 PetscErrorCode MatFactorDumpMatrix(Mat A) 439 { 440 PetscBool flg = PETSC_FALSE; 441 442 PetscFunctionBegin; 443 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 444 if (flg) { 445 PetscViewer viewer; 446 char filename[PETSC_MAX_PATH_LEN]; 447 448 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 449 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 450 PetscCall(MatView(A, viewer)); 451 PetscCall(PetscViewerDestroy(&viewer)); 452 } 453 PetscFunctionReturn(PETSC_SUCCESS); 454 } 455 456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 457 { 458 Mat C = B; 459 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 460 IS isrow = b->row, isicol = b->icol; 461 const PetscInt *r, *ic, *ics; 462 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 463 PetscInt i, j, k, nz, nzL, row, *pj; 464 const PetscInt *ajtmp, *bjtmp; 465 MatScalar *rtmp, *pc, multiplier, *pv; 466 const MatScalar *aa = a->a, *v; 467 PetscBool row_identity, col_identity; 468 FactorShiftCtx sctx; 469 const PetscInt *ddiag; 470 PetscReal rs; 471 MatScalar d; 472 473 PetscFunctionBegin; 474 /* MatPivotSetUp(): initialize shift context sctx */ 475 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 476 477 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 478 ddiag = a->diag; 479 sctx.shift_top = info->zeropivot; 480 for (i = 0; i < n; i++) { 481 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 482 d = (aa)[ddiag[i]]; 483 rs = -PetscAbsScalar(d) - PetscRealPart(d); 484 v = aa + ai[i]; 485 nz = ai[i + 1] - ai[i]; 486 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 487 if (rs > sctx.shift_top) sctx.shift_top = rs; 488 } 489 sctx.shift_top *= 1.1; 490 sctx.nshift_max = 5; 491 sctx.shift_lo = 0.; 492 sctx.shift_hi = 1.; 493 } 494 495 PetscCall(ISGetIndices(isrow, &r)); 496 PetscCall(ISGetIndices(isicol, &ic)); 497 PetscCall(PetscMalloc1(n + 1, &rtmp)); 498 ics = ic; 499 500 do { 501 sctx.newshift = PETSC_FALSE; 502 for (i = 0; i < n; i++) { 503 /* zero rtmp */ 504 /* L part */ 505 nz = bi[i + 1] - bi[i]; 506 bjtmp = bj + bi[i]; 507 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 508 509 /* U part */ 510 nz = bdiag[i] - bdiag[i + 1]; 511 bjtmp = bj + bdiag[i + 1] + 1; 512 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 513 514 /* load in initial (unfactored row) */ 515 nz = ai[r[i] + 1] - ai[r[i]]; 516 ajtmp = aj + ai[r[i]]; 517 v = aa + ai[r[i]]; 518 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 519 /* ZeropivotApply() */ 520 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 521 522 /* elimination */ 523 bjtmp = bj + bi[i]; 524 row = *bjtmp++; 525 nzL = bi[i + 1] - bi[i]; 526 for (k = 0; k < nzL; k++) { 527 pc = rtmp + row; 528 if (*pc != 0.0) { 529 pv = b->a + bdiag[row]; 530 multiplier = *pc * (*pv); 531 *pc = multiplier; 532 533 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 534 pv = b->a + bdiag[row + 1] + 1; 535 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 536 537 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 538 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 539 } 540 row = *bjtmp++; 541 } 542 543 /* finished row so stick it into b->a */ 544 rs = 0.0; 545 /* L part */ 546 pv = b->a + bi[i]; 547 pj = b->j + bi[i]; 548 nz = bi[i + 1] - bi[i]; 549 for (j = 0; j < nz; j++) { 550 pv[j] = rtmp[pj[j]]; 551 rs += PetscAbsScalar(pv[j]); 552 } 553 554 /* U part */ 555 pv = b->a + bdiag[i + 1] + 1; 556 pj = b->j + bdiag[i + 1] + 1; 557 nz = bdiag[i] - bdiag[i + 1] - 1; 558 for (j = 0; j < nz; j++) { 559 pv[j] = rtmp[pj[j]]; 560 rs += PetscAbsScalar(pv[j]); 561 } 562 563 sctx.rs = rs; 564 sctx.pv = rtmp[i]; 565 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 566 if (sctx.newshift) break; /* break for-loop */ 567 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 568 569 /* Mark diagonal and invert diagonal for simpler triangular solves */ 570 pv = b->a + bdiag[i]; 571 *pv = 1.0 / rtmp[i]; 572 573 } /* endof for (i=0; i<n; i++) { */ 574 575 /* MatPivotRefine() */ 576 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 577 /* 578 * if no shift in this attempt & shifting & started shifting & can refine, 579 * then try lower shift 580 */ 581 sctx.shift_hi = sctx.shift_fraction; 582 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 583 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 584 sctx.newshift = PETSC_TRUE; 585 sctx.nshift++; 586 } 587 } while (sctx.newshift); 588 589 PetscCall(PetscFree(rtmp)); 590 PetscCall(ISRestoreIndices(isicol, &ic)); 591 PetscCall(ISRestoreIndices(isrow, &r)); 592 593 PetscCall(ISIdentity(isrow, &row_identity)); 594 PetscCall(ISIdentity(isicol, &col_identity)); 595 if (b->inode.size) { 596 C->ops->solve = MatSolve_SeqAIJ_Inode; 597 } else if (row_identity && col_identity) { 598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 599 } else { 600 C->ops->solve = MatSolve_SeqAIJ; 601 } 602 C->ops->solveadd = MatSolveAdd_SeqAIJ; 603 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 604 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 605 C->ops->matsolve = MatMatSolve_SeqAIJ; 606 C->assembled = PETSC_TRUE; 607 C->preallocated = PETSC_TRUE; 608 609 PetscCall(PetscLogFlops(C->cmap->n)); 610 611 /* MatShiftView(A,info,&sctx) */ 612 if (sctx.nshift) { 613 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 614 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 615 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 616 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 617 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 618 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 619 } 620 } 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 625 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 626 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 627 628 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 629 { 630 Mat C = B; 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 632 IS isrow = b->row, isicol = b->icol; 633 const PetscInt *r, *ic, *ics; 634 PetscInt nz, row, i, j, n = A->rmap->n, diag; 635 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 636 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 637 MatScalar *pv, *rtmp, *pc, multiplier, d; 638 const MatScalar *v, *aa = a->a; 639 PetscReal rs = 0.0; 640 FactorShiftCtx sctx; 641 const PetscInt *ddiag; 642 PetscBool row_identity, col_identity; 643 644 PetscFunctionBegin; 645 /* MatPivotSetUp(): initialize shift context sctx */ 646 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 647 648 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 649 ddiag = a->diag; 650 sctx.shift_top = info->zeropivot; 651 for (i = 0; i < n; i++) { 652 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 653 d = (aa)[ddiag[i]]; 654 rs = -PetscAbsScalar(d) - PetscRealPart(d); 655 v = aa + ai[i]; 656 nz = ai[i + 1] - ai[i]; 657 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 658 if (rs > sctx.shift_top) sctx.shift_top = rs; 659 } 660 sctx.shift_top *= 1.1; 661 sctx.nshift_max = 5; 662 sctx.shift_lo = 0.; 663 sctx.shift_hi = 1.; 664 } 665 666 PetscCall(ISGetIndices(isrow, &r)); 667 PetscCall(ISGetIndices(isicol, &ic)); 668 PetscCall(PetscMalloc1(n + 1, &rtmp)); 669 ics = ic; 670 671 do { 672 sctx.newshift = PETSC_FALSE; 673 for (i = 0; i < n; i++) { 674 nz = bi[i + 1] - bi[i]; 675 bjtmp = bj + bi[i]; 676 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 677 678 /* load in initial (unfactored row) */ 679 nz = ai[r[i] + 1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = aa + ai[r[i]]; 682 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 683 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 684 685 row = *bjtmp++; 686 while (row < i) { 687 pc = rtmp + row; 688 if (*pc != 0.0) { 689 pv = b->a + diag_offset[row]; 690 pj = b->j + diag_offset[row] + 1; 691 multiplier = *pc / *pv++; 692 *pc = multiplier; 693 nz = bi[row + 1] - diag_offset[row] - 1; 694 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 695 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 696 } 697 row = *bjtmp++; 698 } 699 /* finished row so stick it into b->a */ 700 pv = b->a + bi[i]; 701 pj = b->j + bi[i]; 702 nz = bi[i + 1] - bi[i]; 703 diag = diag_offset[i] - bi[i]; 704 rs = 0.0; 705 for (j = 0; j < nz; j++) { 706 pv[j] = rtmp[pj[j]]; 707 rs += PetscAbsScalar(pv[j]); 708 } 709 rs -= PetscAbsScalar(pv[diag]); 710 711 sctx.rs = rs; 712 sctx.pv = pv[diag]; 713 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 714 if (sctx.newshift) break; 715 pv[diag] = sctx.pv; 716 } 717 718 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 719 /* 720 * if no shift in this attempt & shifting & started shifting & can refine, 721 * then try lower shift 722 */ 723 sctx.shift_hi = sctx.shift_fraction; 724 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 725 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 726 sctx.newshift = PETSC_TRUE; 727 sctx.nshift++; 728 } 729 } while (sctx.newshift); 730 731 /* invert diagonal entries for simpler triangular solves */ 732 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 733 PetscCall(PetscFree(rtmp)); 734 PetscCall(ISRestoreIndices(isicol, &ic)); 735 PetscCall(ISRestoreIndices(isrow, &r)); 736 737 PetscCall(ISIdentity(isrow, &row_identity)); 738 PetscCall(ISIdentity(isicol, &col_identity)); 739 if (row_identity && col_identity) { 740 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 741 } else { 742 C->ops->solve = MatSolve_SeqAIJ_inplace; 743 } 744 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 745 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 746 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 747 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 748 749 C->assembled = PETSC_TRUE; 750 C->preallocated = PETSC_TRUE; 751 752 PetscCall(PetscLogFlops(C->cmap->n)); 753 if (sctx.nshift) { 754 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 755 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 756 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 757 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 758 } 759 } 760 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 761 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 762 763 PetscCall(MatSeqAIJCheckInode(C)); 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 768 769 /* 770 This routine implements inplace ILU(0) with row or/and column permutations. 771 Input: 772 A - original matrix 773 Output; 774 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 775 a->j (col index) is permuted by the inverse of colperm, then sorted 776 a->a reordered accordingly with a->j 777 a->diag (ptr to diagonal elements) is updated. 778 */ 779 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 780 { 781 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 782 IS isrow = a->row, isicol = a->icol; 783 const PetscInt *r, *ic, *ics; 784 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 785 PetscInt *ajtmp, nz, row; 786 PetscInt *diag = a->diag, nbdiag, *pj; 787 PetscScalar *rtmp, *pc, multiplier, d; 788 MatScalar *pv, *v; 789 PetscReal rs; 790 FactorShiftCtx sctx; 791 const MatScalar *aa = a->a, *vtmp; 792 793 PetscFunctionBegin; 794 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 795 796 /* MatPivotSetUp(): initialize shift context sctx */ 797 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 798 799 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 800 const PetscInt *ddiag = a->diag; 801 sctx.shift_top = info->zeropivot; 802 for (i = 0; i < n; i++) { 803 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 804 d = (aa)[ddiag[i]]; 805 rs = -PetscAbsScalar(d) - PetscRealPart(d); 806 vtmp = aa + ai[i]; 807 nz = ai[i + 1] - ai[i]; 808 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 809 if (rs > sctx.shift_top) sctx.shift_top = rs; 810 } 811 sctx.shift_top *= 1.1; 812 sctx.nshift_max = 5; 813 sctx.shift_lo = 0.; 814 sctx.shift_hi = 1.; 815 } 816 817 PetscCall(ISGetIndices(isrow, &r)); 818 PetscCall(ISGetIndices(isicol, &ic)); 819 PetscCall(PetscMalloc1(n + 1, &rtmp)); 820 PetscCall(PetscArrayzero(rtmp, n + 1)); 821 ics = ic; 822 823 #if defined(MV) 824 sctx.shift_top = 0.; 825 sctx.nshift_max = 0; 826 sctx.shift_lo = 0.; 827 sctx.shift_hi = 0.; 828 sctx.shift_fraction = 0.; 829 830 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 831 sctx.shift_top = 0.; 832 for (i = 0; i < n; i++) { 833 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 834 d = (a->a)[diag[i]]; 835 rs = -PetscAbsScalar(d) - PetscRealPart(d); 836 v = a->a + ai[i]; 837 nz = ai[i + 1] - ai[i]; 838 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 839 if (rs > sctx.shift_top) sctx.shift_top = rs; 840 } 841 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 842 sctx.shift_top *= 1.1; 843 sctx.nshift_max = 5; 844 sctx.shift_lo = 0.; 845 sctx.shift_hi = 1.; 846 } 847 848 sctx.shift_amount = 0.; 849 sctx.nshift = 0; 850 #endif 851 852 do { 853 sctx.newshift = PETSC_FALSE; 854 for (i = 0; i < n; i++) { 855 /* load in initial unfactored row */ 856 nz = ai[r[i] + 1] - ai[r[i]]; 857 ajtmp = aj + ai[r[i]]; 858 v = a->a + ai[r[i]]; 859 /* sort permuted ajtmp and values v accordingly */ 860 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 861 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 862 863 diag[r[i]] = ai[r[i]]; 864 for (j = 0; j < nz; j++) { 865 rtmp[ajtmp[j]] = v[j]; 866 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 867 } 868 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 869 870 row = *ajtmp++; 871 while (row < i) { 872 pc = rtmp + row; 873 if (*pc != 0.0) { 874 pv = a->a + diag[r[row]]; 875 pj = aj + diag[r[row]] + 1; 876 877 multiplier = *pc / *pv++; 878 *pc = multiplier; 879 nz = ai[r[row] + 1] - diag[r[row]] - 1; 880 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 881 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 882 } 883 row = *ajtmp++; 884 } 885 /* finished row so overwrite it onto a->a */ 886 pv = a->a + ai[r[i]]; 887 pj = aj + ai[r[i]]; 888 nz = ai[r[i] + 1] - ai[r[i]]; 889 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 890 891 rs = 0.0; 892 for (j = 0; j < nz; j++) { 893 pv[j] = rtmp[pj[j]]; 894 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 895 } 896 897 sctx.rs = rs; 898 sctx.pv = pv[nbdiag]; 899 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 900 if (sctx.newshift) break; 901 pv[nbdiag] = sctx.pv; 902 } 903 904 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 905 /* 906 * if no shift in this attempt & shifting & started shifting & can refine, 907 * then try lower shift 908 */ 909 sctx.shift_hi = sctx.shift_fraction; 910 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 911 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 912 sctx.newshift = PETSC_TRUE; 913 sctx.nshift++; 914 } 915 } while (sctx.newshift); 916 917 /* invert diagonal entries for simpler triangular solves */ 918 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 919 920 PetscCall(PetscFree(rtmp)); 921 PetscCall(ISRestoreIndices(isicol, &ic)); 922 PetscCall(ISRestoreIndices(isrow, &r)); 923 924 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 925 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 926 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 927 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 928 929 A->assembled = PETSC_TRUE; 930 A->preallocated = PETSC_TRUE; 931 932 PetscCall(PetscLogFlops(A->cmap->n)); 933 if (sctx.nshift) { 934 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 935 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 936 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 937 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 938 } 939 } 940 PetscFunctionReturn(PETSC_SUCCESS); 941 } 942 943 /* ----------------------------------------------------------- */ 944 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 945 { 946 Mat C; 947 948 PetscFunctionBegin; 949 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 950 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 951 PetscCall(MatLUFactorNumeric(C, A, info)); 952 953 A->ops->solve = C->ops->solve; 954 A->ops->solvetranspose = C->ops->solvetranspose; 955 956 PetscCall(MatHeaderMerge(A, &C)); 957 PetscFunctionReturn(PETSC_SUCCESS); 958 } 959 /* ----------------------------------------------------------- */ 960 961 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 962 { 963 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 964 IS iscol = a->col, isrow = a->row; 965 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 966 PetscInt nz; 967 const PetscInt *rout, *cout, *r, *c; 968 PetscScalar *x, *tmp, *tmps, sum; 969 const PetscScalar *b; 970 const MatScalar *aa = a->a, *v; 971 972 PetscFunctionBegin; 973 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 974 975 PetscCall(VecGetArrayRead(bb, &b)); 976 PetscCall(VecGetArrayWrite(xx, &x)); 977 tmp = a->solve_work; 978 979 PetscCall(ISGetIndices(isrow, &rout)); 980 r = rout; 981 PetscCall(ISGetIndices(iscol, &cout)); 982 c = cout + (n - 1); 983 984 /* forward solve the lower triangular */ 985 tmp[0] = b[*r++]; 986 tmps = tmp; 987 for (i = 1; i < n; i++) { 988 v = aa + ai[i]; 989 vi = aj + ai[i]; 990 nz = a->diag[i] - ai[i]; 991 sum = b[*r++]; 992 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 993 tmp[i] = sum; 994 } 995 996 /* backward solve the upper triangular */ 997 for (i = n - 1; i >= 0; i--) { 998 v = aa + a->diag[i] + 1; 999 vi = aj + a->diag[i] + 1; 1000 nz = ai[i + 1] - a->diag[i] - 1; 1001 sum = tmp[i]; 1002 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1003 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1004 } 1005 1006 PetscCall(ISRestoreIndices(isrow, &rout)); 1007 PetscCall(ISRestoreIndices(iscol, &cout)); 1008 PetscCall(VecRestoreArrayRead(bb, &b)); 1009 PetscCall(VecRestoreArrayWrite(xx, &x)); 1010 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1011 PetscFunctionReturn(PETSC_SUCCESS); 1012 } 1013 1014 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1015 { 1016 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1017 IS iscol = a->col, isrow = a->row; 1018 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1019 PetscInt nz, neq, ldb, ldx; 1020 const PetscInt *rout, *cout, *r, *c; 1021 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1022 const PetscScalar *b, *aa = a->a, *v; 1023 PetscBool isdense; 1024 1025 PetscFunctionBegin; 1026 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1027 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1028 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1029 if (X != B) { 1030 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1031 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1032 } 1033 PetscCall(MatDenseGetArrayRead(B, &b)); 1034 PetscCall(MatDenseGetLDA(B, &ldb)); 1035 PetscCall(MatDenseGetArray(X, &x)); 1036 PetscCall(MatDenseGetLDA(X, &ldx)); 1037 PetscCall(ISGetIndices(isrow, &rout)); 1038 r = rout; 1039 PetscCall(ISGetIndices(iscol, &cout)); 1040 c = cout; 1041 for (neq = 0; neq < B->cmap->n; neq++) { 1042 /* forward solve the lower triangular */ 1043 tmp[0] = b[r[0]]; 1044 tmps = tmp; 1045 for (i = 1; i < n; i++) { 1046 v = aa + ai[i]; 1047 vi = aj + ai[i]; 1048 nz = a->diag[i] - ai[i]; 1049 sum = b[r[i]]; 1050 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1051 tmp[i] = sum; 1052 } 1053 /* backward solve the upper triangular */ 1054 for (i = n - 1; i >= 0; i--) { 1055 v = aa + a->diag[i] + 1; 1056 vi = aj + a->diag[i] + 1; 1057 nz = ai[i + 1] - a->diag[i] - 1; 1058 sum = tmp[i]; 1059 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1060 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1061 } 1062 b += ldb; 1063 x += ldx; 1064 } 1065 PetscCall(ISRestoreIndices(isrow, &rout)); 1066 PetscCall(ISRestoreIndices(iscol, &cout)); 1067 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1068 PetscCall(MatDenseRestoreArray(X, &x)); 1069 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1070 PetscFunctionReturn(PETSC_SUCCESS); 1071 } 1072 1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1074 { 1075 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1076 IS iscol = a->col, isrow = a->row; 1077 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1078 PetscInt nz, neq, ldb, ldx; 1079 const PetscInt *rout, *cout, *r, *c; 1080 PetscScalar *x, *tmp = a->solve_work, sum; 1081 const PetscScalar *b, *aa = a->a, *v; 1082 PetscBool isdense; 1083 1084 PetscFunctionBegin; 1085 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1086 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1087 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1088 if (X != B) { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1090 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1091 } 1092 PetscCall(MatDenseGetArrayRead(B, &b)); 1093 PetscCall(MatDenseGetLDA(B, &ldb)); 1094 PetscCall(MatDenseGetArray(X, &x)); 1095 PetscCall(MatDenseGetLDA(X, &ldx)); 1096 PetscCall(ISGetIndices(isrow, &rout)); 1097 r = rout; 1098 PetscCall(ISGetIndices(iscol, &cout)); 1099 c = cout; 1100 for (neq = 0; neq < B->cmap->n; neq++) { 1101 /* forward solve the lower triangular */ 1102 tmp[0] = b[r[0]]; 1103 v = aa; 1104 vi = aj; 1105 for (i = 1; i < n; i++) { 1106 nz = ai[i + 1] - ai[i]; 1107 sum = b[r[i]]; 1108 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1109 tmp[i] = sum; 1110 v += nz; 1111 vi += nz; 1112 } 1113 /* backward solve the upper triangular */ 1114 for (i = n - 1; i >= 0; i--) { 1115 v = aa + adiag[i + 1] + 1; 1116 vi = aj + adiag[i + 1] + 1; 1117 nz = adiag[i] - adiag[i + 1] - 1; 1118 sum = tmp[i]; 1119 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1120 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1121 } 1122 b += ldb; 1123 x += ldx; 1124 } 1125 PetscCall(ISRestoreIndices(isrow, &rout)); 1126 PetscCall(ISRestoreIndices(iscol, &cout)); 1127 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1128 PetscCall(MatDenseRestoreArray(X, &x)); 1129 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1134 { 1135 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1136 IS iscol = a->col, isrow = a->row; 1137 const PetscInt *r, *c, *rout, *cout; 1138 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1139 PetscInt nz, row; 1140 PetscScalar *x, *tmp, *tmps, sum; 1141 const PetscScalar *b; 1142 const MatScalar *aa = a->a, *v; 1143 1144 PetscFunctionBegin; 1145 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1146 1147 PetscCall(VecGetArrayRead(bb, &b)); 1148 PetscCall(VecGetArrayWrite(xx, &x)); 1149 tmp = a->solve_work; 1150 1151 PetscCall(ISGetIndices(isrow, &rout)); 1152 r = rout; 1153 PetscCall(ISGetIndices(iscol, &cout)); 1154 c = cout + (n - 1); 1155 1156 /* forward solve the lower triangular */ 1157 tmp[0] = b[*r++]; 1158 tmps = tmp; 1159 for (row = 1; row < n; row++) { 1160 i = rout[row]; /* permuted row */ 1161 v = aa + ai[i]; 1162 vi = aj + ai[i]; 1163 nz = a->diag[i] - ai[i]; 1164 sum = b[*r++]; 1165 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1166 tmp[row] = sum; 1167 } 1168 1169 /* backward solve the upper triangular */ 1170 for (row = n - 1; row >= 0; row--) { 1171 i = rout[row]; /* permuted row */ 1172 v = aa + a->diag[i] + 1; 1173 vi = aj + a->diag[i] + 1; 1174 nz = ai[i + 1] - a->diag[i] - 1; 1175 sum = tmp[row]; 1176 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1177 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1178 } 1179 1180 PetscCall(ISRestoreIndices(isrow, &rout)); 1181 PetscCall(ISRestoreIndices(iscol, &cout)); 1182 PetscCall(VecRestoreArrayRead(bb, &b)); 1183 PetscCall(VecRestoreArrayWrite(xx, &x)); 1184 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1185 PetscFunctionReturn(PETSC_SUCCESS); 1186 } 1187 1188 /* ----------------------------------------------------------- */ 1189 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1190 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1191 { 1192 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1193 PetscInt n = A->rmap->n; 1194 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1195 PetscScalar *x; 1196 const PetscScalar *b; 1197 const MatScalar *aa = a->a; 1198 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1199 PetscInt adiag_i, i, nz, ai_i; 1200 const PetscInt *vi; 1201 const MatScalar *v; 1202 PetscScalar sum; 1203 #endif 1204 1205 PetscFunctionBegin; 1206 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1207 1208 PetscCall(VecGetArrayRead(bb, &b)); 1209 PetscCall(VecGetArrayWrite(xx, &x)); 1210 1211 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1212 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1213 #else 1214 /* forward solve the lower triangular */ 1215 x[0] = b[0]; 1216 for (i = 1; i < n; i++) { 1217 ai_i = ai[i]; 1218 v = aa + ai_i; 1219 vi = aj + ai_i; 1220 nz = adiag[i] - ai_i; 1221 sum = b[i]; 1222 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1223 x[i] = sum; 1224 } 1225 1226 /* backward solve the upper triangular */ 1227 for (i = n - 1; i >= 0; i--) { 1228 adiag_i = adiag[i]; 1229 v = aa + adiag_i + 1; 1230 vi = aj + adiag_i + 1; 1231 nz = ai[i + 1] - adiag_i - 1; 1232 sum = x[i]; 1233 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1234 x[i] = sum * aa[adiag_i]; 1235 } 1236 #endif 1237 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1238 PetscCall(VecRestoreArrayRead(bb, &b)); 1239 PetscCall(VecRestoreArrayWrite(xx, &x)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1244 { 1245 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1246 IS iscol = a->col, isrow = a->row; 1247 PetscInt i, n = A->rmap->n, j; 1248 PetscInt nz; 1249 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1250 PetscScalar *x, *tmp, sum; 1251 const PetscScalar *b; 1252 const MatScalar *aa = a->a, *v; 1253 1254 PetscFunctionBegin; 1255 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1256 1257 PetscCall(VecGetArrayRead(bb, &b)); 1258 PetscCall(VecGetArray(xx, &x)); 1259 tmp = a->solve_work; 1260 1261 PetscCall(ISGetIndices(isrow, &rout)); 1262 r = rout; 1263 PetscCall(ISGetIndices(iscol, &cout)); 1264 c = cout + (n - 1); 1265 1266 /* forward solve the lower triangular */ 1267 tmp[0] = b[*r++]; 1268 for (i = 1; i < n; i++) { 1269 v = aa + ai[i]; 1270 vi = aj + ai[i]; 1271 nz = a->diag[i] - ai[i]; 1272 sum = b[*r++]; 1273 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1274 tmp[i] = sum; 1275 } 1276 1277 /* backward solve the upper triangular */ 1278 for (i = n - 1; i >= 0; i--) { 1279 v = aa + a->diag[i] + 1; 1280 vi = aj + a->diag[i] + 1; 1281 nz = ai[i + 1] - a->diag[i] - 1; 1282 sum = tmp[i]; 1283 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1284 tmp[i] = sum * aa[a->diag[i]]; 1285 x[*c--] += tmp[i]; 1286 } 1287 1288 PetscCall(ISRestoreIndices(isrow, &rout)); 1289 PetscCall(ISRestoreIndices(iscol, &cout)); 1290 PetscCall(VecRestoreArrayRead(bb, &b)); 1291 PetscCall(VecRestoreArray(xx, &x)); 1292 PetscCall(PetscLogFlops(2.0 * a->nz)); 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1297 { 1298 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1299 IS iscol = a->col, isrow = a->row; 1300 PetscInt i, n = A->rmap->n, j; 1301 PetscInt nz; 1302 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1303 PetscScalar *x, *tmp, sum; 1304 const PetscScalar *b; 1305 const MatScalar *aa = a->a, *v; 1306 1307 PetscFunctionBegin; 1308 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1309 1310 PetscCall(VecGetArrayRead(bb, &b)); 1311 PetscCall(VecGetArray(xx, &x)); 1312 tmp = a->solve_work; 1313 1314 PetscCall(ISGetIndices(isrow, &rout)); 1315 r = rout; 1316 PetscCall(ISGetIndices(iscol, &cout)); 1317 c = cout; 1318 1319 /* forward solve the lower triangular */ 1320 tmp[0] = b[r[0]]; 1321 v = aa; 1322 vi = aj; 1323 for (i = 1; i < n; i++) { 1324 nz = ai[i + 1] - ai[i]; 1325 sum = b[r[i]]; 1326 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1327 tmp[i] = sum; 1328 v += nz; 1329 vi += nz; 1330 } 1331 1332 /* backward solve the upper triangular */ 1333 v = aa + adiag[n - 1]; 1334 vi = aj + adiag[n - 1]; 1335 for (i = n - 1; i >= 0; i--) { 1336 nz = adiag[i] - adiag[i + 1] - 1; 1337 sum = tmp[i]; 1338 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1339 tmp[i] = sum * v[nz]; 1340 x[c[i]] += tmp[i]; 1341 v += nz + 1; 1342 vi += nz + 1; 1343 } 1344 1345 PetscCall(ISRestoreIndices(isrow, &rout)); 1346 PetscCall(ISRestoreIndices(iscol, &cout)); 1347 PetscCall(VecRestoreArrayRead(bb, &b)); 1348 PetscCall(VecRestoreArray(xx, &x)); 1349 PetscCall(PetscLogFlops(2.0 * a->nz)); 1350 PetscFunctionReturn(PETSC_SUCCESS); 1351 } 1352 1353 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1354 { 1355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1356 IS iscol = a->col, isrow = a->row; 1357 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1358 PetscInt i, n = A->rmap->n, j; 1359 PetscInt nz; 1360 PetscScalar *x, *tmp, s1; 1361 const MatScalar *aa = a->a, *v; 1362 const PetscScalar *b; 1363 1364 PetscFunctionBegin; 1365 PetscCall(VecGetArrayRead(bb, &b)); 1366 PetscCall(VecGetArrayWrite(xx, &x)); 1367 tmp = a->solve_work; 1368 1369 PetscCall(ISGetIndices(isrow, &rout)); 1370 r = rout; 1371 PetscCall(ISGetIndices(iscol, &cout)); 1372 c = cout; 1373 1374 /* copy the b into temp work space according to permutation */ 1375 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1376 1377 /* forward solve the U^T */ 1378 for (i = 0; i < n; i++) { 1379 v = aa + diag[i]; 1380 vi = aj + diag[i] + 1; 1381 nz = ai[i + 1] - diag[i] - 1; 1382 s1 = tmp[i]; 1383 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1384 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1385 tmp[i] = s1; 1386 } 1387 1388 /* backward solve the L^T */ 1389 for (i = n - 1; i >= 0; i--) { 1390 v = aa + diag[i] - 1; 1391 vi = aj + diag[i] - 1; 1392 nz = diag[i] - ai[i]; 1393 s1 = tmp[i]; 1394 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1395 } 1396 1397 /* copy tmp into x according to permutation */ 1398 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1399 1400 PetscCall(ISRestoreIndices(isrow, &rout)); 1401 PetscCall(ISRestoreIndices(iscol, &cout)); 1402 PetscCall(VecRestoreArrayRead(bb, &b)); 1403 PetscCall(VecRestoreArrayWrite(xx, &x)); 1404 1405 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1410 { 1411 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1412 IS iscol = a->col, isrow = a->row; 1413 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1414 PetscInt i, n = A->rmap->n, j; 1415 PetscInt nz; 1416 PetscScalar *x, *tmp, s1; 1417 const MatScalar *aa = a->a, *v; 1418 const PetscScalar *b; 1419 1420 PetscFunctionBegin; 1421 PetscCall(VecGetArrayRead(bb, &b)); 1422 PetscCall(VecGetArrayWrite(xx, &x)); 1423 tmp = a->solve_work; 1424 1425 PetscCall(ISGetIndices(isrow, &rout)); 1426 r = rout; 1427 PetscCall(ISGetIndices(iscol, &cout)); 1428 c = cout; 1429 1430 /* copy the b into temp work space according to permutation */ 1431 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1432 1433 /* forward solve the U^T */ 1434 for (i = 0; i < n; i++) { 1435 v = aa + adiag[i + 1] + 1; 1436 vi = aj + adiag[i + 1] + 1; 1437 nz = adiag[i] - adiag[i + 1] - 1; 1438 s1 = tmp[i]; 1439 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1440 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1441 tmp[i] = s1; 1442 } 1443 1444 /* backward solve the L^T */ 1445 for (i = n - 1; i >= 0; i--) { 1446 v = aa + ai[i]; 1447 vi = aj + ai[i]; 1448 nz = ai[i + 1] - ai[i]; 1449 s1 = tmp[i]; 1450 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1451 } 1452 1453 /* copy tmp into x according to permutation */ 1454 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1455 1456 PetscCall(ISRestoreIndices(isrow, &rout)); 1457 PetscCall(ISRestoreIndices(iscol, &cout)); 1458 PetscCall(VecRestoreArrayRead(bb, &b)); 1459 PetscCall(VecRestoreArrayWrite(xx, &x)); 1460 1461 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1466 { 1467 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1468 IS iscol = a->col, isrow = a->row; 1469 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1470 PetscInt i, n = A->rmap->n, j; 1471 PetscInt nz; 1472 PetscScalar *x, *tmp, s1; 1473 const MatScalar *aa = a->a, *v; 1474 const PetscScalar *b; 1475 1476 PetscFunctionBegin; 1477 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1478 PetscCall(VecGetArrayRead(bb, &b)); 1479 PetscCall(VecGetArray(xx, &x)); 1480 tmp = a->solve_work; 1481 1482 PetscCall(ISGetIndices(isrow, &rout)); 1483 r = rout; 1484 PetscCall(ISGetIndices(iscol, &cout)); 1485 c = cout; 1486 1487 /* copy the b into temp work space according to permutation */ 1488 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1489 1490 /* forward solve the U^T */ 1491 for (i = 0; i < n; i++) { 1492 v = aa + diag[i]; 1493 vi = aj + diag[i] + 1; 1494 nz = ai[i + 1] - diag[i] - 1; 1495 s1 = tmp[i]; 1496 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1497 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1498 tmp[i] = s1; 1499 } 1500 1501 /* backward solve the L^T */ 1502 for (i = n - 1; i >= 0; i--) { 1503 v = aa + diag[i] - 1; 1504 vi = aj + diag[i] - 1; 1505 nz = diag[i] - ai[i]; 1506 s1 = tmp[i]; 1507 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1508 } 1509 1510 /* copy tmp into x according to permutation */ 1511 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1512 1513 PetscCall(ISRestoreIndices(isrow, &rout)); 1514 PetscCall(ISRestoreIndices(iscol, &cout)); 1515 PetscCall(VecRestoreArrayRead(bb, &b)); 1516 PetscCall(VecRestoreArray(xx, &x)); 1517 1518 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1519 PetscFunctionReturn(PETSC_SUCCESS); 1520 } 1521 1522 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1523 { 1524 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1525 IS iscol = a->col, isrow = a->row; 1526 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1527 PetscInt i, n = A->rmap->n, j; 1528 PetscInt nz; 1529 PetscScalar *x, *tmp, s1; 1530 const MatScalar *aa = a->a, *v; 1531 const PetscScalar *b; 1532 1533 PetscFunctionBegin; 1534 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1535 PetscCall(VecGetArrayRead(bb, &b)); 1536 PetscCall(VecGetArray(xx, &x)); 1537 tmp = a->solve_work; 1538 1539 PetscCall(ISGetIndices(isrow, &rout)); 1540 r = rout; 1541 PetscCall(ISGetIndices(iscol, &cout)); 1542 c = cout; 1543 1544 /* copy the b into temp work space according to permutation */ 1545 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1546 1547 /* forward solve the U^T */ 1548 for (i = 0; i < n; i++) { 1549 v = aa + adiag[i + 1] + 1; 1550 vi = aj + adiag[i + 1] + 1; 1551 nz = adiag[i] - adiag[i + 1] - 1; 1552 s1 = tmp[i]; 1553 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1554 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1555 tmp[i] = s1; 1556 } 1557 1558 /* backward solve the L^T */ 1559 for (i = n - 1; i >= 0; i--) { 1560 v = aa + ai[i]; 1561 vi = aj + ai[i]; 1562 nz = ai[i + 1] - ai[i]; 1563 s1 = tmp[i]; 1564 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1565 } 1566 1567 /* copy tmp into x according to permutation */ 1568 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1569 1570 PetscCall(ISRestoreIndices(isrow, &rout)); 1571 PetscCall(ISRestoreIndices(iscol, &cout)); 1572 PetscCall(VecRestoreArrayRead(bb, &b)); 1573 PetscCall(VecRestoreArray(xx, &x)); 1574 1575 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 /* ----------------------------------------------------------------*/ 1580 1581 /* 1582 ilu() under revised new data structure. 1583 Factored arrays bj and ba are stored as 1584 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1585 1586 bi=fact->i is an array of size n+1, in which 1587 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1588 bi[n]: points to L(n-1,n-1)+1 1589 1590 bdiag=fact->diag is an array of size n+1,in which 1591 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1592 bdiag[n]: points to entry of U(n-1,0)-1 1593 1594 U(i,:) contains bdiag[i] as its last entry, i.e., 1595 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1596 */ 1597 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1598 { 1599 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1600 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1601 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1602 IS isicol; 1603 1604 PetscFunctionBegin; 1605 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1606 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1607 b = (Mat_SeqAIJ *)(fact)->data; 1608 1609 /* allocate matrix arrays for new data structure */ 1610 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1611 1612 b->singlemalloc = PETSC_TRUE; 1613 if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); } 1614 bdiag = b->diag; 1615 1616 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1617 1618 /* set bi and bj with new data structure */ 1619 bi = b->i; 1620 bj = b->j; 1621 1622 /* L part */ 1623 bi[0] = 0; 1624 for (i = 0; i < n; i++) { 1625 nz = adiag[i] - ai[i]; 1626 bi[i + 1] = bi[i] + nz; 1627 aj = a->j + ai[i]; 1628 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1629 } 1630 1631 /* U part */ 1632 bdiag[n] = bi[n] - 1; 1633 for (i = n - 1; i >= 0; i--) { 1634 nz = ai[i + 1] - adiag[i] - 1; 1635 aj = a->j + adiag[i] + 1; 1636 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1637 /* diag[i] */ 1638 bj[k++] = i; 1639 bdiag[i] = bdiag[i + 1] + nz + 1; 1640 } 1641 1642 fact->factortype = MAT_FACTOR_ILU; 1643 fact->info.factor_mallocs = 0; 1644 fact->info.fill_ratio_given = info->fill; 1645 fact->info.fill_ratio_needed = 1.0; 1646 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1647 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1648 1649 b = (Mat_SeqAIJ *)(fact)->data; 1650 b->row = isrow; 1651 b->col = iscol; 1652 b->icol = isicol; 1653 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1654 PetscCall(PetscObjectReference((PetscObject)isrow)); 1655 PetscCall(PetscObjectReference((PetscObject)iscol)); 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1660 { 1661 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1662 IS isicol; 1663 const PetscInt *r, *ic; 1664 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1665 PetscInt *bi, *cols, nnz, *cols_lvl; 1666 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1667 PetscInt i, levels, diagonal_fill; 1668 PetscBool col_identity, row_identity, missing; 1669 PetscReal f; 1670 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1671 PetscBT lnkbt; 1672 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1673 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1674 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1675 1676 PetscFunctionBegin; 1677 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1678 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1679 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1680 1681 levels = (PetscInt)info->levels; 1682 PetscCall(ISIdentity(isrow, &row_identity)); 1683 PetscCall(ISIdentity(iscol, &col_identity)); 1684 if (!levels && row_identity && col_identity) { 1685 /* special case: ilu(0) with natural ordering */ 1686 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1687 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1688 PetscFunctionReturn(PETSC_SUCCESS); 1689 } 1690 1691 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1692 PetscCall(ISGetIndices(isrow, &r)); 1693 PetscCall(ISGetIndices(isicol, &ic)); 1694 1695 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1696 PetscCall(PetscMalloc1(n + 1, &bi)); 1697 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1698 bi[0] = bdiag[0] = 0; 1699 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1700 1701 /* create a linked list for storing column indices of the active row */ 1702 nlnk = n + 1; 1703 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1704 1705 /* initial FreeSpace size is f*(ai[n]+1) */ 1706 f = info->fill; 1707 diagonal_fill = (PetscInt)info->diagonal_fill; 1708 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1709 current_space = free_space; 1710 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1711 current_space_lvl = free_space_lvl; 1712 for (i = 0; i < n; i++) { 1713 nzi = 0; 1714 /* copy current row into linked list */ 1715 nnz = ai[r[i] + 1] - ai[r[i]]; 1716 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1717 cols = aj + ai[r[i]]; 1718 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1719 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1720 nzi += nlnk; 1721 1722 /* make sure diagonal entry is included */ 1723 if (diagonal_fill && lnk[i] == -1) { 1724 fm = n; 1725 while (lnk[fm] < i) fm = lnk[fm]; 1726 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1727 lnk[fm] = i; 1728 lnk_lvl[i] = 0; 1729 nzi++; 1730 dcount++; 1731 } 1732 1733 /* add pivot rows into the active row */ 1734 nzbd = 0; 1735 prow = lnk[n]; 1736 while (prow < i) { 1737 nnz = bdiag[prow]; 1738 cols = bj_ptr[prow] + nnz + 1; 1739 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1740 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1741 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1742 nzi += nlnk; 1743 prow = lnk[prow]; 1744 nzbd++; 1745 } 1746 bdiag[i] = nzbd; 1747 bi[i + 1] = bi[i] + nzi; 1748 /* if free space is not available, make more free space */ 1749 if (current_space->local_remaining < nzi) { 1750 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1751 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1752 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1753 reallocs++; 1754 } 1755 1756 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1757 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1758 bj_ptr[i] = current_space->array; 1759 bjlvl_ptr[i] = current_space_lvl->array; 1760 1761 /* make sure the active row i has diagonal entry */ 1762 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1763 1764 current_space->array += nzi; 1765 current_space->local_used += nzi; 1766 current_space->local_remaining -= nzi; 1767 current_space_lvl->array += nzi; 1768 current_space_lvl->local_used += nzi; 1769 current_space_lvl->local_remaining -= nzi; 1770 } 1771 1772 PetscCall(ISRestoreIndices(isrow, &r)); 1773 PetscCall(ISRestoreIndices(isicol, &ic)); 1774 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1775 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1776 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1777 1778 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1779 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1780 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1781 1782 #if defined(PETSC_USE_INFO) 1783 { 1784 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1785 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1786 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1787 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1788 PetscCall(PetscInfo(A, "for best performance.\n")); 1789 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1790 } 1791 #endif 1792 /* put together the new matrix */ 1793 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1794 b = (Mat_SeqAIJ *)(fact)->data; 1795 1796 b->free_a = PETSC_TRUE; 1797 b->free_ij = PETSC_TRUE; 1798 b->singlemalloc = PETSC_FALSE; 1799 1800 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1801 1802 b->j = bj; 1803 b->i = bi; 1804 b->diag = bdiag; 1805 b->ilen = NULL; 1806 b->imax = NULL; 1807 b->row = isrow; 1808 b->col = iscol; 1809 PetscCall(PetscObjectReference((PetscObject)isrow)); 1810 PetscCall(PetscObjectReference((PetscObject)iscol)); 1811 b->icol = isicol; 1812 1813 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1814 /* In b structure: Free imax, ilen, old a, old j. 1815 Allocate bdiag, solve_work, new a, new j */ 1816 b->maxnz = b->nz = bdiag[0] + 1; 1817 1818 (fact)->info.factor_mallocs = reallocs; 1819 (fact)->info.fill_ratio_given = f; 1820 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1821 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1822 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1823 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1824 PetscFunctionReturn(PETSC_SUCCESS); 1825 } 1826 1827 #if 0 1828 // unused 1829 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1830 { 1831 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1832 IS isicol; 1833 const PetscInt *r, *ic; 1834 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1835 PetscInt *bi, *cols, nnz, *cols_lvl; 1836 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1837 PetscInt i, levels, diagonal_fill; 1838 PetscBool col_identity, row_identity; 1839 PetscReal f; 1840 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1841 PetscBT lnkbt; 1842 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1843 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1844 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1845 PetscBool missing; 1846 1847 PetscFunctionBegin; 1848 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1849 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1850 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1851 1852 f = info->fill; 1853 levels = (PetscInt)info->levels; 1854 diagonal_fill = (PetscInt)info->diagonal_fill; 1855 1856 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1857 1858 PetscCall(ISIdentity(isrow, &row_identity)); 1859 PetscCall(ISIdentity(iscol, &col_identity)); 1860 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1861 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1862 1863 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1864 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1865 fact->factortype = MAT_FACTOR_ILU; 1866 (fact)->info.factor_mallocs = 0; 1867 (fact)->info.fill_ratio_given = info->fill; 1868 (fact)->info.fill_ratio_needed = 1.0; 1869 1870 b = (Mat_SeqAIJ *)(fact)->data; 1871 b->row = isrow; 1872 b->col = iscol; 1873 b->icol = isicol; 1874 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1875 PetscCall(PetscObjectReference((PetscObject)isrow)); 1876 PetscCall(PetscObjectReference((PetscObject)iscol)); 1877 PetscFunctionReturn(PETSC_SUCCESS); 1878 } 1879 1880 PetscCall(ISGetIndices(isrow, &r)); 1881 PetscCall(ISGetIndices(isicol, &ic)); 1882 1883 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1884 PetscCall(PetscMalloc1(n + 1, &bi)); 1885 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1886 bi[0] = bdiag[0] = 0; 1887 1888 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1889 1890 /* create a linked list for storing column indices of the active row */ 1891 nlnk = n + 1; 1892 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1893 1894 /* initial FreeSpace size is f*(ai[n]+1) */ 1895 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1896 current_space = free_space; 1897 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1898 current_space_lvl = free_space_lvl; 1899 1900 for (i = 0; i < n; i++) { 1901 nzi = 0; 1902 /* copy current row into linked list */ 1903 nnz = ai[r[i] + 1] - ai[r[i]]; 1904 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1905 cols = aj + ai[r[i]]; 1906 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1907 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1908 nzi += nlnk; 1909 1910 /* make sure diagonal entry is included */ 1911 if (diagonal_fill && lnk[i] == -1) { 1912 fm = n; 1913 while (lnk[fm] < i) fm = lnk[fm]; 1914 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1915 lnk[fm] = i; 1916 lnk_lvl[i] = 0; 1917 nzi++; 1918 dcount++; 1919 } 1920 1921 /* add pivot rows into the active row */ 1922 nzbd = 0; 1923 prow = lnk[n]; 1924 while (prow < i) { 1925 nnz = bdiag[prow]; 1926 cols = bj_ptr[prow] + nnz + 1; 1927 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1928 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1929 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1930 nzi += nlnk; 1931 prow = lnk[prow]; 1932 nzbd++; 1933 } 1934 bdiag[i] = nzbd; 1935 bi[i + 1] = bi[i] + nzi; 1936 1937 /* if free space is not available, make more free space */ 1938 if (current_space->local_remaining < nzi) { 1939 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1940 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1941 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1942 reallocs++; 1943 } 1944 1945 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1946 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1947 bj_ptr[i] = current_space->array; 1948 bjlvl_ptr[i] = current_space_lvl->array; 1949 1950 /* make sure the active row i has diagonal entry */ 1951 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1952 1953 current_space->array += nzi; 1954 current_space->local_used += nzi; 1955 current_space->local_remaining -= nzi; 1956 current_space_lvl->array += nzi; 1957 current_space_lvl->local_used += nzi; 1958 current_space_lvl->local_remaining -= nzi; 1959 } 1960 1961 PetscCall(ISRestoreIndices(isrow, &r)); 1962 PetscCall(ISRestoreIndices(isicol, &ic)); 1963 1964 /* destroy list of free space and other temporary arrays */ 1965 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1966 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1967 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1968 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1969 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1970 1971 #if defined(PETSC_USE_INFO) 1972 { 1973 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1974 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1975 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1976 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1977 PetscCall(PetscInfo(A, "for best performance.\n")); 1978 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1979 } 1980 #endif 1981 1982 /* put together the new matrix */ 1983 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1984 b = (Mat_SeqAIJ *)(fact)->data; 1985 1986 b->free_a = PETSC_TRUE; 1987 b->free_ij = PETSC_TRUE; 1988 b->singlemalloc = PETSC_FALSE; 1989 1990 PetscCall(PetscMalloc1(bi[n], &b->a)); 1991 b->j = bj; 1992 b->i = bi; 1993 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1994 b->diag = bdiag; 1995 b->ilen = NULL; 1996 b->imax = NULL; 1997 b->row = isrow; 1998 b->col = iscol; 1999 PetscCall(PetscObjectReference((PetscObject)isrow)); 2000 PetscCall(PetscObjectReference((PetscObject)iscol)); 2001 b->icol = isicol; 2002 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2003 /* In b structure: Free imax, ilen, old a, old j. 2004 Allocate bdiag, solve_work, new a, new j */ 2005 b->maxnz = b->nz = bi[n]; 2006 2007 (fact)->info.factor_mallocs = reallocs; 2008 (fact)->info.fill_ratio_given = f; 2009 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2010 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2011 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2012 PetscFunctionReturn(PETSC_SUCCESS); 2013 } 2014 #endif 2015 2016 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2017 { 2018 Mat C = B; 2019 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2020 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2021 IS ip = b->row, iip = b->icol; 2022 const PetscInt *rip, *riip; 2023 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2024 PetscInt *ai = a->i, *aj = a->j; 2025 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2026 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2027 PetscBool perm_identity; 2028 FactorShiftCtx sctx; 2029 PetscReal rs; 2030 MatScalar d, *v; 2031 2032 PetscFunctionBegin; 2033 /* MatPivotSetUp(): initialize shift context sctx */ 2034 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2035 2036 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2037 sctx.shift_top = info->zeropivot; 2038 for (i = 0; i < mbs; i++) { 2039 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2040 d = (aa)[a->diag[i]]; 2041 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2042 v = aa + ai[i]; 2043 nz = ai[i + 1] - ai[i]; 2044 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2045 if (rs > sctx.shift_top) sctx.shift_top = rs; 2046 } 2047 sctx.shift_top *= 1.1; 2048 sctx.nshift_max = 5; 2049 sctx.shift_lo = 0.; 2050 sctx.shift_hi = 1.; 2051 } 2052 2053 PetscCall(ISGetIndices(ip, &rip)); 2054 PetscCall(ISGetIndices(iip, &riip)); 2055 2056 /* allocate working arrays 2057 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2058 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2059 */ 2060 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2061 2062 do { 2063 sctx.newshift = PETSC_FALSE; 2064 2065 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2066 if (mbs) il[0] = 0; 2067 2068 for (k = 0; k < mbs; k++) { 2069 /* zero rtmp */ 2070 nz = bi[k + 1] - bi[k]; 2071 bjtmp = bj + bi[k]; 2072 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2073 2074 /* load in initial unfactored row */ 2075 bval = ba + bi[k]; 2076 jmin = ai[rip[k]]; 2077 jmax = ai[rip[k] + 1]; 2078 for (j = jmin; j < jmax; j++) { 2079 col = riip[aj[j]]; 2080 if (col >= k) { /* only take upper triangular entry */ 2081 rtmp[col] = aa[j]; 2082 *bval++ = 0.0; /* for in-place factorization */ 2083 } 2084 } 2085 /* shift the diagonal of the matrix: ZeropivotApply() */ 2086 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2087 2088 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2089 dk = rtmp[k]; 2090 i = c2r[k]; /* first row to be added to k_th row */ 2091 2092 while (i < k) { 2093 nexti = c2r[i]; /* next row to be added to k_th row */ 2094 2095 /* compute multiplier, update diag(k) and U(i,k) */ 2096 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2097 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2098 dk += uikdi * ba[ili]; /* update diag[k] */ 2099 ba[ili] = uikdi; /* -U(i,k) */ 2100 2101 /* add multiple of row i to k-th row */ 2102 jmin = ili + 1; 2103 jmax = bi[i + 1]; 2104 if (jmin < jmax) { 2105 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2106 /* update il and c2r for row i */ 2107 il[i] = jmin; 2108 j = bj[jmin]; 2109 c2r[i] = c2r[j]; 2110 c2r[j] = i; 2111 } 2112 i = nexti; 2113 } 2114 2115 /* copy data into U(k,:) */ 2116 rs = 0.0; 2117 jmin = bi[k]; 2118 jmax = bi[k + 1] - 1; 2119 if (jmin < jmax) { 2120 for (j = jmin; j < jmax; j++) { 2121 col = bj[j]; 2122 ba[j] = rtmp[col]; 2123 rs += PetscAbsScalar(ba[j]); 2124 } 2125 /* add the k-th row into il and c2r */ 2126 il[k] = jmin; 2127 i = bj[jmin]; 2128 c2r[k] = c2r[i]; 2129 c2r[i] = k; 2130 } 2131 2132 /* MatPivotCheck() */ 2133 sctx.rs = rs; 2134 sctx.pv = dk; 2135 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2136 if (sctx.newshift) break; 2137 dk = sctx.pv; 2138 2139 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2140 } 2141 } while (sctx.newshift); 2142 2143 PetscCall(PetscFree3(rtmp, il, c2r)); 2144 PetscCall(ISRestoreIndices(ip, &rip)); 2145 PetscCall(ISRestoreIndices(iip, &riip)); 2146 2147 PetscCall(ISIdentity(ip, &perm_identity)); 2148 if (perm_identity) { 2149 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2150 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2151 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2152 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2153 } else { 2154 B->ops->solve = MatSolve_SeqSBAIJ_1; 2155 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2156 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2157 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2158 } 2159 2160 C->assembled = PETSC_TRUE; 2161 C->preallocated = PETSC_TRUE; 2162 2163 PetscCall(PetscLogFlops(C->rmap->n)); 2164 2165 /* MatPivotView() */ 2166 if (sctx.nshift) { 2167 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2168 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2169 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2170 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2171 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2172 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2173 } 2174 } 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2179 { 2180 Mat C = B; 2181 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2182 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2183 IS ip = b->row, iip = b->icol; 2184 const PetscInt *rip, *riip; 2185 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2186 PetscInt *ai = a->i, *aj = a->j; 2187 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2188 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2189 PetscBool perm_identity; 2190 FactorShiftCtx sctx; 2191 PetscReal rs; 2192 MatScalar d, *v; 2193 2194 PetscFunctionBegin; 2195 /* MatPivotSetUp(): initialize shift context sctx */ 2196 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2197 2198 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2199 sctx.shift_top = info->zeropivot; 2200 for (i = 0; i < mbs; i++) { 2201 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2202 d = (aa)[a->diag[i]]; 2203 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2204 v = aa + ai[i]; 2205 nz = ai[i + 1] - ai[i]; 2206 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2207 if (rs > sctx.shift_top) sctx.shift_top = rs; 2208 } 2209 sctx.shift_top *= 1.1; 2210 sctx.nshift_max = 5; 2211 sctx.shift_lo = 0.; 2212 sctx.shift_hi = 1.; 2213 } 2214 2215 PetscCall(ISGetIndices(ip, &rip)); 2216 PetscCall(ISGetIndices(iip, &riip)); 2217 2218 /* initialization */ 2219 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2220 2221 do { 2222 sctx.newshift = PETSC_FALSE; 2223 2224 for (i = 0; i < mbs; i++) jl[i] = mbs; 2225 il[0] = 0; 2226 2227 for (k = 0; k < mbs; k++) { 2228 /* zero rtmp */ 2229 nz = bi[k + 1] - bi[k]; 2230 bjtmp = bj + bi[k]; 2231 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2232 2233 bval = ba + bi[k]; 2234 /* initialize k-th row by the perm[k]-th row of A */ 2235 jmin = ai[rip[k]]; 2236 jmax = ai[rip[k] + 1]; 2237 for (j = jmin; j < jmax; j++) { 2238 col = riip[aj[j]]; 2239 if (col >= k) { /* only take upper triangular entry */ 2240 rtmp[col] = aa[j]; 2241 *bval++ = 0.0; /* for in-place factorization */ 2242 } 2243 } 2244 /* shift the diagonal of the matrix */ 2245 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2246 2247 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2248 dk = rtmp[k]; 2249 i = jl[k]; /* first row to be added to k_th row */ 2250 2251 while (i < k) { 2252 nexti = jl[i]; /* next row to be added to k_th row */ 2253 2254 /* compute multiplier, update diag(k) and U(i,k) */ 2255 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2256 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2257 dk += uikdi * ba[ili]; 2258 ba[ili] = uikdi; /* -U(i,k) */ 2259 2260 /* add multiple of row i to k-th row */ 2261 jmin = ili + 1; 2262 jmax = bi[i + 1]; 2263 if (jmin < jmax) { 2264 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2265 /* update il and jl for row i */ 2266 il[i] = jmin; 2267 j = bj[jmin]; 2268 jl[i] = jl[j]; 2269 jl[j] = i; 2270 } 2271 i = nexti; 2272 } 2273 2274 /* shift the diagonals when zero pivot is detected */ 2275 /* compute rs=sum of abs(off-diagonal) */ 2276 rs = 0.0; 2277 jmin = bi[k] + 1; 2278 nz = bi[k + 1] - jmin; 2279 bcol = bj + jmin; 2280 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2281 2282 sctx.rs = rs; 2283 sctx.pv = dk; 2284 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2285 if (sctx.newshift) break; 2286 dk = sctx.pv; 2287 2288 /* copy data into U(k,:) */ 2289 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2290 jmin = bi[k] + 1; 2291 jmax = bi[k + 1]; 2292 if (jmin < jmax) { 2293 for (j = jmin; j < jmax; j++) { 2294 col = bj[j]; 2295 ba[j] = rtmp[col]; 2296 } 2297 /* add the k-th row into il and jl */ 2298 il[k] = jmin; 2299 i = bj[jmin]; 2300 jl[k] = jl[i]; 2301 jl[i] = k; 2302 } 2303 } 2304 } while (sctx.newshift); 2305 2306 PetscCall(PetscFree3(rtmp, il, jl)); 2307 PetscCall(ISRestoreIndices(ip, &rip)); 2308 PetscCall(ISRestoreIndices(iip, &riip)); 2309 2310 PetscCall(ISIdentity(ip, &perm_identity)); 2311 if (perm_identity) { 2312 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2313 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2314 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2315 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2316 } else { 2317 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2318 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2319 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2320 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2321 } 2322 2323 C->assembled = PETSC_TRUE; 2324 C->preallocated = PETSC_TRUE; 2325 2326 PetscCall(PetscLogFlops(C->rmap->n)); 2327 if (sctx.nshift) { 2328 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2329 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2330 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2331 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2332 } 2333 } 2334 PetscFunctionReturn(PETSC_SUCCESS); 2335 } 2336 2337 /* 2338 icc() under revised new data structure. 2339 Factored arrays bj and ba are stored as 2340 U(0,:),...,U(i,:),U(n-1,:) 2341 2342 ui=fact->i is an array of size n+1, in which 2343 ui+ 2344 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2345 ui[n]: points to U(n-1,n-1)+1 2346 2347 udiag=fact->diag is an array of size n,in which 2348 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2349 2350 U(i,:) contains udiag[i] as its last entry, i.e., 2351 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2352 */ 2353 2354 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2355 { 2356 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2357 Mat_SeqSBAIJ *b; 2358 PetscBool perm_identity, missing; 2359 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2360 const PetscInt *rip, *riip; 2361 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2362 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2363 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2364 PetscReal fill = info->fill, levels = info->levels; 2365 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2366 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2367 PetscBT lnkbt; 2368 IS iperm; 2369 2370 PetscFunctionBegin; 2371 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2372 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2373 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2374 PetscCall(ISIdentity(perm, &perm_identity)); 2375 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2376 2377 PetscCall(PetscMalloc1(am + 1, &ui)); 2378 PetscCall(PetscMalloc1(am + 1, &udiag)); 2379 ui[0] = 0; 2380 2381 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2382 if (!levels && perm_identity) { 2383 for (i = 0; i < am; i++) { 2384 ncols = ai[i + 1] - a->diag[i]; 2385 ui[i + 1] = ui[i] + ncols; 2386 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2387 } 2388 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2389 cols = uj; 2390 for (i = 0; i < am; i++) { 2391 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2392 ncols = ai[i + 1] - a->diag[i] - 1; 2393 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2394 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2395 } 2396 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2397 PetscCall(ISGetIndices(iperm, &riip)); 2398 PetscCall(ISGetIndices(perm, &rip)); 2399 2400 /* initialization */ 2401 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2402 2403 /* jl: linked list for storing indices of the pivot rows 2404 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2405 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2406 for (i = 0; i < am; i++) { 2407 jl[i] = am; 2408 il[i] = 0; 2409 } 2410 2411 /* create and initialize a linked list for storing column indices of the active row k */ 2412 nlnk = am + 1; 2413 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2414 2415 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2416 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2417 current_space = free_space; 2418 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2419 current_space_lvl = free_space_lvl; 2420 2421 for (k = 0; k < am; k++) { /* for each active row k */ 2422 /* initialize lnk by the column indices of row rip[k] of A */ 2423 nzk = 0; 2424 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2425 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2426 ncols_upper = 0; 2427 for (j = 0; j < ncols; j++) { 2428 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2429 if (riip[i] >= k) { /* only take upper triangular entry */ 2430 ajtmp[ncols_upper] = i; 2431 ncols_upper++; 2432 } 2433 } 2434 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2435 nzk += nlnk; 2436 2437 /* update lnk by computing fill-in for each pivot row to be merged in */ 2438 prow = jl[k]; /* 1st pivot row */ 2439 2440 while (prow < k) { 2441 nextprow = jl[prow]; 2442 2443 /* merge prow into k-th row */ 2444 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2445 jmax = ui[prow + 1]; 2446 ncols = jmax - jmin; 2447 i = jmin - ui[prow]; 2448 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2449 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2450 j = *(uj - 1); 2451 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2452 nzk += nlnk; 2453 2454 /* update il and jl for prow */ 2455 if (jmin < jmax) { 2456 il[prow] = jmin; 2457 j = *cols; 2458 jl[prow] = jl[j]; 2459 jl[j] = prow; 2460 } 2461 prow = nextprow; 2462 } 2463 2464 /* if free space is not available, make more free space */ 2465 if (current_space->local_remaining < nzk) { 2466 i = am - k + 1; /* num of unfactored rows */ 2467 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2468 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2469 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2470 reallocs++; 2471 } 2472 2473 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2474 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2475 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2476 2477 /* add the k-th row into il and jl */ 2478 if (nzk > 1) { 2479 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2480 jl[k] = jl[i]; 2481 jl[i] = k; 2482 il[k] = ui[k] + 1; 2483 } 2484 uj_ptr[k] = current_space->array; 2485 uj_lvl_ptr[k] = current_space_lvl->array; 2486 2487 current_space->array += nzk; 2488 current_space->local_used += nzk; 2489 current_space->local_remaining -= nzk; 2490 2491 current_space_lvl->array += nzk; 2492 current_space_lvl->local_used += nzk; 2493 current_space_lvl->local_remaining -= nzk; 2494 2495 ui[k + 1] = ui[k] + nzk; 2496 } 2497 2498 PetscCall(ISRestoreIndices(perm, &rip)); 2499 PetscCall(ISRestoreIndices(iperm, &riip)); 2500 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2501 PetscCall(PetscFree(ajtmp)); 2502 2503 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2504 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2505 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2506 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2507 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2508 2509 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2510 2511 /* put together the new matrix in MATSEQSBAIJ format */ 2512 b = (Mat_SeqSBAIJ *)(fact)->data; 2513 b->singlemalloc = PETSC_FALSE; 2514 2515 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2516 2517 b->j = uj; 2518 b->i = ui; 2519 b->diag = udiag; 2520 b->free_diag = PETSC_TRUE; 2521 b->ilen = NULL; 2522 b->imax = NULL; 2523 b->row = perm; 2524 b->col = perm; 2525 PetscCall(PetscObjectReference((PetscObject)perm)); 2526 PetscCall(PetscObjectReference((PetscObject)perm)); 2527 b->icol = iperm; 2528 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2529 2530 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2531 2532 b->maxnz = b->nz = ui[am]; 2533 b->free_a = PETSC_TRUE; 2534 b->free_ij = PETSC_TRUE; 2535 2536 fact->info.factor_mallocs = reallocs; 2537 fact->info.fill_ratio_given = fill; 2538 if (ai[am] != 0) { 2539 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2540 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2541 } else { 2542 fact->info.fill_ratio_needed = 0.0; 2543 } 2544 #if defined(PETSC_USE_INFO) 2545 if (ai[am] != 0) { 2546 PetscReal af = fact->info.fill_ratio_needed; 2547 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2548 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2549 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2550 } else { 2551 PetscCall(PetscInfo(A, "Empty matrix\n")); 2552 } 2553 #endif 2554 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2555 PetscFunctionReturn(PETSC_SUCCESS); 2556 } 2557 2558 #if 0 2559 // unused 2560 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2561 { 2562 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2563 Mat_SeqSBAIJ *b; 2564 PetscBool perm_identity, missing; 2565 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2566 const PetscInt *rip, *riip; 2567 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2568 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2569 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2570 PetscReal fill = info->fill, levels = info->levels; 2571 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2572 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2573 PetscBT lnkbt; 2574 IS iperm; 2575 2576 PetscFunctionBegin; 2577 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2578 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2579 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2580 PetscCall(ISIdentity(perm, &perm_identity)); 2581 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2582 2583 PetscCall(PetscMalloc1(am + 1, &ui)); 2584 PetscCall(PetscMalloc1(am + 1, &udiag)); 2585 ui[0] = 0; 2586 2587 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2588 if (!levels && perm_identity) { 2589 for (i = 0; i < am; i++) { 2590 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2591 udiag[i] = ui[i]; 2592 } 2593 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2594 cols = uj; 2595 for (i = 0; i < am; i++) { 2596 aj = a->j + a->diag[i]; 2597 ncols = ui[i + 1] - ui[i]; 2598 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2599 } 2600 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2601 PetscCall(ISGetIndices(iperm, &riip)); 2602 PetscCall(ISGetIndices(perm, &rip)); 2603 2604 /* initialization */ 2605 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2606 2607 /* jl: linked list for storing indices of the pivot rows 2608 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2609 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2610 for (i = 0; i < am; i++) { 2611 jl[i] = am; 2612 il[i] = 0; 2613 } 2614 2615 /* create and initialize a linked list for storing column indices of the active row k */ 2616 nlnk = am + 1; 2617 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2618 2619 /* initial FreeSpace size is fill*(ai[am]+1) */ 2620 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2621 current_space = free_space; 2622 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2623 current_space_lvl = free_space_lvl; 2624 2625 for (k = 0; k < am; k++) { /* for each active row k */ 2626 /* initialize lnk by the column indices of row rip[k] of A */ 2627 nzk = 0; 2628 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2629 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2630 ncols_upper = 0; 2631 for (j = 0; j < ncols; j++) { 2632 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2633 if (riip[i] >= k) { /* only take upper triangular entry */ 2634 ajtmp[ncols_upper] = i; 2635 ncols_upper++; 2636 } 2637 } 2638 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2639 nzk += nlnk; 2640 2641 /* update lnk by computing fill-in for each pivot row to be merged in */ 2642 prow = jl[k]; /* 1st pivot row */ 2643 2644 while (prow < k) { 2645 nextprow = jl[prow]; 2646 2647 /* merge prow into k-th row */ 2648 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2649 jmax = ui[prow + 1]; 2650 ncols = jmax - jmin; 2651 i = jmin - ui[prow]; 2652 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2653 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2654 j = *(uj - 1); 2655 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2656 nzk += nlnk; 2657 2658 /* update il and jl for prow */ 2659 if (jmin < jmax) { 2660 il[prow] = jmin; 2661 j = *cols; 2662 jl[prow] = jl[j]; 2663 jl[j] = prow; 2664 } 2665 prow = nextprow; 2666 } 2667 2668 /* if free space is not available, make more free space */ 2669 if (current_space->local_remaining < nzk) { 2670 i = am - k + 1; /* num of unfactored rows */ 2671 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2672 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2673 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2674 reallocs++; 2675 } 2676 2677 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2678 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2679 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2680 2681 /* add the k-th row into il and jl */ 2682 if (nzk > 1) { 2683 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2684 jl[k] = jl[i]; 2685 jl[i] = k; 2686 il[k] = ui[k] + 1; 2687 } 2688 uj_ptr[k] = current_space->array; 2689 uj_lvl_ptr[k] = current_space_lvl->array; 2690 2691 current_space->array += nzk; 2692 current_space->local_used += nzk; 2693 current_space->local_remaining -= nzk; 2694 2695 current_space_lvl->array += nzk; 2696 current_space_lvl->local_used += nzk; 2697 current_space_lvl->local_remaining -= nzk; 2698 2699 ui[k + 1] = ui[k] + nzk; 2700 } 2701 2702 #if defined(PETSC_USE_INFO) 2703 if (ai[am] != 0) { 2704 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2705 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2706 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2707 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2708 } else { 2709 PetscCall(PetscInfo(A, "Empty matrix\n")); 2710 } 2711 #endif 2712 2713 PetscCall(ISRestoreIndices(perm, &rip)); 2714 PetscCall(ISRestoreIndices(iperm, &riip)); 2715 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2716 PetscCall(PetscFree(ajtmp)); 2717 2718 /* destroy list of free space and other temporary array(s) */ 2719 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2720 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2721 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2722 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2723 2724 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2725 2726 /* put together the new matrix in MATSEQSBAIJ format */ 2727 2728 b = (Mat_SeqSBAIJ *)fact->data; 2729 b->singlemalloc = PETSC_FALSE; 2730 2731 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2732 2733 b->j = uj; 2734 b->i = ui; 2735 b->diag = udiag; 2736 b->free_diag = PETSC_TRUE; 2737 b->ilen = NULL; 2738 b->imax = NULL; 2739 b->row = perm; 2740 b->col = perm; 2741 2742 PetscCall(PetscObjectReference((PetscObject)perm)); 2743 PetscCall(PetscObjectReference((PetscObject)perm)); 2744 2745 b->icol = iperm; 2746 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2747 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2748 b->maxnz = b->nz = ui[am]; 2749 b->free_a = PETSC_TRUE; 2750 b->free_ij = PETSC_TRUE; 2751 2752 fact->info.factor_mallocs = reallocs; 2753 fact->info.fill_ratio_given = fill; 2754 if (ai[am] != 0) { 2755 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2756 } else { 2757 fact->info.fill_ratio_needed = 0.0; 2758 } 2759 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 #endif 2763 2764 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2765 { 2766 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2767 Mat_SeqSBAIJ *b; 2768 PetscBool perm_identity, missing; 2769 PetscReal fill = info->fill; 2770 const PetscInt *rip, *riip; 2771 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2772 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2773 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2774 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2775 PetscBT lnkbt; 2776 IS iperm; 2777 2778 PetscFunctionBegin; 2779 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2780 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2781 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2782 2783 /* check whether perm is the identity mapping */ 2784 PetscCall(ISIdentity(perm, &perm_identity)); 2785 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2786 PetscCall(ISGetIndices(iperm, &riip)); 2787 PetscCall(ISGetIndices(perm, &rip)); 2788 2789 /* initialization */ 2790 PetscCall(PetscMalloc1(am + 1, &ui)); 2791 PetscCall(PetscMalloc1(am + 1, &udiag)); 2792 ui[0] = 0; 2793 2794 /* jl: linked list for storing indices of the pivot rows 2795 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2796 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2797 for (i = 0; i < am; i++) { 2798 jl[i] = am; 2799 il[i] = 0; 2800 } 2801 2802 /* create and initialize a linked list for storing column indices of the active row k */ 2803 nlnk = am + 1; 2804 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2805 2806 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2807 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2808 current_space = free_space; 2809 2810 for (k = 0; k < am; k++) { /* for each active row k */ 2811 /* initialize lnk by the column indices of row rip[k] of A */ 2812 nzk = 0; 2813 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2814 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2815 ncols_upper = 0; 2816 for (j = 0; j < ncols; j++) { 2817 i = riip[*(aj + ai[rip[k]] + j)]; 2818 if (i >= k) { /* only take upper triangular entry */ 2819 cols[ncols_upper] = i; 2820 ncols_upper++; 2821 } 2822 } 2823 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2824 nzk += nlnk; 2825 2826 /* update lnk by computing fill-in for each pivot row to be merged in */ 2827 prow = jl[k]; /* 1st pivot row */ 2828 2829 while (prow < k) { 2830 nextprow = jl[prow]; 2831 /* merge prow into k-th row */ 2832 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2833 jmax = ui[prow + 1]; 2834 ncols = jmax - jmin; 2835 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2836 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2837 nzk += nlnk; 2838 2839 /* update il and jl for prow */ 2840 if (jmin < jmax) { 2841 il[prow] = jmin; 2842 j = *uj_ptr; 2843 jl[prow] = jl[j]; 2844 jl[j] = prow; 2845 } 2846 prow = nextprow; 2847 } 2848 2849 /* if free space is not available, make more free space */ 2850 if (current_space->local_remaining < nzk) { 2851 i = am - k + 1; /* num of unfactored rows */ 2852 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2853 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2854 reallocs++; 2855 } 2856 2857 /* copy data into free space, then initialize lnk */ 2858 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2859 2860 /* add the k-th row into il and jl */ 2861 if (nzk > 1) { 2862 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2863 jl[k] = jl[i]; 2864 jl[i] = k; 2865 il[k] = ui[k] + 1; 2866 } 2867 ui_ptr[k] = current_space->array; 2868 2869 current_space->array += nzk; 2870 current_space->local_used += nzk; 2871 current_space->local_remaining -= nzk; 2872 2873 ui[k + 1] = ui[k] + nzk; 2874 } 2875 2876 PetscCall(ISRestoreIndices(perm, &rip)); 2877 PetscCall(ISRestoreIndices(iperm, &riip)); 2878 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2879 2880 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2881 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2882 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2883 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2884 2885 /* put together the new matrix in MATSEQSBAIJ format */ 2886 2887 b = (Mat_SeqSBAIJ *)fact->data; 2888 b->singlemalloc = PETSC_FALSE; 2889 b->free_a = PETSC_TRUE; 2890 b->free_ij = PETSC_TRUE; 2891 2892 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2893 2894 b->j = uj; 2895 b->i = ui; 2896 b->diag = udiag; 2897 b->free_diag = PETSC_TRUE; 2898 b->ilen = NULL; 2899 b->imax = NULL; 2900 b->row = perm; 2901 b->col = perm; 2902 2903 PetscCall(PetscObjectReference((PetscObject)perm)); 2904 PetscCall(PetscObjectReference((PetscObject)perm)); 2905 2906 b->icol = iperm; 2907 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2908 2909 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2910 2911 b->maxnz = b->nz = ui[am]; 2912 2913 fact->info.factor_mallocs = reallocs; 2914 fact->info.fill_ratio_given = fill; 2915 if (ai[am] != 0) { 2916 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2917 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2918 } else { 2919 fact->info.fill_ratio_needed = 0.0; 2920 } 2921 #if defined(PETSC_USE_INFO) 2922 if (ai[am] != 0) { 2923 PetscReal af = fact->info.fill_ratio_needed; 2924 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2925 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2926 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2927 } else { 2928 PetscCall(PetscInfo(A, "Empty matrix\n")); 2929 } 2930 #endif 2931 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2932 PetscFunctionReturn(PETSC_SUCCESS); 2933 } 2934 2935 #if 0 2936 // unused 2937 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2938 { 2939 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2940 Mat_SeqSBAIJ *b; 2941 PetscBool perm_identity, missing; 2942 PetscReal fill = info->fill; 2943 const PetscInt *rip, *riip; 2944 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2945 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2946 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2947 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2948 PetscBT lnkbt; 2949 IS iperm; 2950 2951 PetscFunctionBegin; 2952 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2953 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2954 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2955 2956 /* check whether perm is the identity mapping */ 2957 PetscCall(ISIdentity(perm, &perm_identity)); 2958 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2959 PetscCall(ISGetIndices(iperm, &riip)); 2960 PetscCall(ISGetIndices(perm, &rip)); 2961 2962 /* initialization */ 2963 PetscCall(PetscMalloc1(am + 1, &ui)); 2964 ui[0] = 0; 2965 2966 /* jl: linked list for storing indices of the pivot rows 2967 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2968 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2969 for (i = 0; i < am; i++) { 2970 jl[i] = am; 2971 il[i] = 0; 2972 } 2973 2974 /* create and initialize a linked list for storing column indices of the active row k */ 2975 nlnk = am + 1; 2976 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2977 2978 /* initial FreeSpace size is fill*(ai[am]+1) */ 2979 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2980 current_space = free_space; 2981 2982 for (k = 0; k < am; k++) { /* for each active row k */ 2983 /* initialize lnk by the column indices of row rip[k] of A */ 2984 nzk = 0; 2985 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2986 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2987 ncols_upper = 0; 2988 for (j = 0; j < ncols; j++) { 2989 i = riip[*(aj + ai[rip[k]] + j)]; 2990 if (i >= k) { /* only take upper triangular entry */ 2991 cols[ncols_upper] = i; 2992 ncols_upper++; 2993 } 2994 } 2995 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2996 nzk += nlnk; 2997 2998 /* update lnk by computing fill-in for each pivot row to be merged in */ 2999 prow = jl[k]; /* 1st pivot row */ 3000 3001 while (prow < k) { 3002 nextprow = jl[prow]; 3003 /* merge prow into k-th row */ 3004 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3005 jmax = ui[prow + 1]; 3006 ncols = jmax - jmin; 3007 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3008 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3009 nzk += nlnk; 3010 3011 /* update il and jl for prow */ 3012 if (jmin < jmax) { 3013 il[prow] = jmin; 3014 j = *uj_ptr; 3015 jl[prow] = jl[j]; 3016 jl[j] = prow; 3017 } 3018 prow = nextprow; 3019 } 3020 3021 /* if free space is not available, make more free space */ 3022 if (current_space->local_remaining < nzk) { 3023 i = am - k + 1; /* num of unfactored rows */ 3024 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3025 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3026 reallocs++; 3027 } 3028 3029 /* copy data into free space, then initialize lnk */ 3030 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3031 3032 /* add the k-th row into il and jl */ 3033 if (nzk - 1 > 0) { 3034 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3035 jl[k] = jl[i]; 3036 jl[i] = k; 3037 il[k] = ui[k] + 1; 3038 } 3039 ui_ptr[k] = current_space->array; 3040 3041 current_space->array += nzk; 3042 current_space->local_used += nzk; 3043 current_space->local_remaining -= nzk; 3044 3045 ui[k + 1] = ui[k] + nzk; 3046 } 3047 3048 #if defined(PETSC_USE_INFO) 3049 if (ai[am] != 0) { 3050 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3051 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3052 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3053 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3054 } else { 3055 PetscCall(PetscInfo(A, "Empty matrix\n")); 3056 } 3057 #endif 3058 3059 PetscCall(ISRestoreIndices(perm, &rip)); 3060 PetscCall(ISRestoreIndices(iperm, &riip)); 3061 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3062 3063 /* destroy list of free space and other temporary array(s) */ 3064 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3065 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3066 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3067 3068 /* put together the new matrix in MATSEQSBAIJ format */ 3069 3070 b = (Mat_SeqSBAIJ *)fact->data; 3071 b->singlemalloc = PETSC_FALSE; 3072 b->free_a = PETSC_TRUE; 3073 b->free_ij = PETSC_TRUE; 3074 3075 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3076 3077 b->j = uj; 3078 b->i = ui; 3079 b->diag = NULL; 3080 b->ilen = NULL; 3081 b->imax = NULL; 3082 b->row = perm; 3083 b->col = perm; 3084 3085 PetscCall(PetscObjectReference((PetscObject)perm)); 3086 PetscCall(PetscObjectReference((PetscObject)perm)); 3087 3088 b->icol = iperm; 3089 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3090 3091 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3092 b->maxnz = b->nz = ui[am]; 3093 3094 fact->info.factor_mallocs = reallocs; 3095 fact->info.fill_ratio_given = fill; 3096 if (ai[am] != 0) { 3097 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3098 } else { 3099 fact->info.fill_ratio_needed = 0.0; 3100 } 3101 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3102 PetscFunctionReturn(PETSC_SUCCESS); 3103 } 3104 #endif 3105 3106 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3107 { 3108 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3109 PetscInt n = A->rmap->n; 3110 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3111 PetscScalar *x, sum; 3112 const PetscScalar *b; 3113 const MatScalar *aa = a->a, *v; 3114 PetscInt i, nz; 3115 3116 PetscFunctionBegin; 3117 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3118 3119 PetscCall(VecGetArrayRead(bb, &b)); 3120 PetscCall(VecGetArrayWrite(xx, &x)); 3121 3122 /* forward solve the lower triangular */ 3123 x[0] = b[0]; 3124 v = aa; 3125 vi = aj; 3126 for (i = 1; i < n; i++) { 3127 nz = ai[i + 1] - ai[i]; 3128 sum = b[i]; 3129 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3130 v += nz; 3131 vi += nz; 3132 x[i] = sum; 3133 } 3134 3135 /* backward solve the upper triangular */ 3136 for (i = n - 1; i >= 0; i--) { 3137 v = aa + adiag[i + 1] + 1; 3138 vi = aj + adiag[i + 1] + 1; 3139 nz = adiag[i] - adiag[i + 1] - 1; 3140 sum = x[i]; 3141 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3142 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3143 } 3144 3145 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3146 PetscCall(VecRestoreArrayRead(bb, &b)); 3147 PetscCall(VecRestoreArrayWrite(xx, &x)); 3148 PetscFunctionReturn(PETSC_SUCCESS); 3149 } 3150 3151 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3152 { 3153 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3154 IS iscol = a->col, isrow = a->row; 3155 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3156 const PetscInt *rout, *cout, *r, *c; 3157 PetscScalar *x, *tmp, sum; 3158 const PetscScalar *b; 3159 const MatScalar *aa = a->a, *v; 3160 3161 PetscFunctionBegin; 3162 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3163 3164 PetscCall(VecGetArrayRead(bb, &b)); 3165 PetscCall(VecGetArrayWrite(xx, &x)); 3166 tmp = a->solve_work; 3167 3168 PetscCall(ISGetIndices(isrow, &rout)); 3169 r = rout; 3170 PetscCall(ISGetIndices(iscol, &cout)); 3171 c = cout; 3172 3173 /* forward solve the lower triangular */ 3174 tmp[0] = b[r[0]]; 3175 v = aa; 3176 vi = aj; 3177 for (i = 1; i < n; i++) { 3178 nz = ai[i + 1] - ai[i]; 3179 sum = b[r[i]]; 3180 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3181 tmp[i] = sum; 3182 v += nz; 3183 vi += nz; 3184 } 3185 3186 /* backward solve the upper triangular */ 3187 for (i = n - 1; i >= 0; i--) { 3188 v = aa + adiag[i + 1] + 1; 3189 vi = aj + adiag[i + 1] + 1; 3190 nz = adiag[i] - adiag[i + 1] - 1; 3191 sum = tmp[i]; 3192 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3193 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3194 } 3195 3196 PetscCall(ISRestoreIndices(isrow, &rout)); 3197 PetscCall(ISRestoreIndices(iscol, &cout)); 3198 PetscCall(VecRestoreArrayRead(bb, &b)); 3199 PetscCall(VecRestoreArrayWrite(xx, &x)); 3200 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3201 PetscFunctionReturn(PETSC_SUCCESS); 3202 } 3203 3204 #if 0 3205 // unused 3206 /* 3207 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3208 */ 3209 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3210 { 3211 Mat B = *fact; 3212 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3213 IS isicol; 3214 const PetscInt *r, *ic; 3215 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3216 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3217 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3218 PetscInt nlnk, *lnk; 3219 PetscBT lnkbt; 3220 PetscBool row_identity, icol_identity; 3221 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3222 const PetscInt *ics; 3223 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3224 PetscReal dt = info->dt, shift = info->shiftamount; 3225 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3226 PetscBool missing; 3227 3228 PetscFunctionBegin; 3229 if (dt == PETSC_DEFAULT) dt = 0.005; 3230 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3231 3232 /* ------- symbolic factorization, can be reused ---------*/ 3233 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3234 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3235 adiag = a->diag; 3236 3237 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3238 3239 /* bdiag is location of diagonal in factor */ 3240 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3241 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3242 3243 /* allocate row pointers bi */ 3244 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3245 3246 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3247 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3248 nnz_max = ai[n] + 2 * n * dtcount + 2; 3249 3250 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3251 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3252 3253 /* put together the new matrix */ 3254 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3255 b = (Mat_SeqAIJ *)B->data; 3256 3257 b->free_a = PETSC_TRUE; 3258 b->free_ij = PETSC_TRUE; 3259 b->singlemalloc = PETSC_FALSE; 3260 3261 b->a = ba; 3262 b->j = bj; 3263 b->i = bi; 3264 b->diag = bdiag; 3265 b->ilen = NULL; 3266 b->imax = NULL; 3267 b->row = isrow; 3268 b->col = iscol; 3269 PetscCall(PetscObjectReference((PetscObject)isrow)); 3270 PetscCall(PetscObjectReference((PetscObject)iscol)); 3271 b->icol = isicol; 3272 3273 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3274 b->maxnz = nnz_max; 3275 3276 B->factortype = MAT_FACTOR_ILUDT; 3277 B->info.factor_mallocs = 0; 3278 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3279 /* ------- end of symbolic factorization ---------*/ 3280 3281 PetscCall(ISGetIndices(isrow, &r)); 3282 PetscCall(ISGetIndices(isicol, &ic)); 3283 ics = ic; 3284 3285 /* linked list for storing column indices of the active row */ 3286 nlnk = n + 1; 3287 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3288 3289 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3290 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3291 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3292 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3293 PetscCall(PetscArrayzero(rtmp, n)); 3294 3295 bi[0] = 0; 3296 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3297 bdiag_rev[n] = bdiag[0]; 3298 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3299 for (i = 0; i < n; i++) { 3300 /* copy initial fill into linked list */ 3301 nzi = ai[r[i] + 1] - ai[r[i]]; 3302 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3303 nzi_al = adiag[r[i]] - ai[r[i]]; 3304 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3305 ajtmp = aj + ai[r[i]]; 3306 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3307 3308 /* load in initial (unfactored row) */ 3309 aatmp = a->a + ai[r[i]]; 3310 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3311 3312 /* add pivot rows into linked list */ 3313 row = lnk[n]; 3314 while (row < i) { 3315 nzi_bl = bi[row + 1] - bi[row] + 1; 3316 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3317 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3318 nzi += nlnk; 3319 row = lnk[row]; 3320 } 3321 3322 /* copy data from lnk into jtmp, then initialize lnk */ 3323 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3324 3325 /* numerical factorization */ 3326 bjtmp = jtmp; 3327 row = *bjtmp++; /* 1st pivot row */ 3328 while (row < i) { 3329 pc = rtmp + row; 3330 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3331 multiplier = (*pc) * (*pv); 3332 *pc = multiplier; 3333 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3334 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3335 pv = ba + bdiag[row + 1] + 1; 3336 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3337 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3338 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3339 } 3340 row = *bjtmp++; 3341 } 3342 3343 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3344 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3345 nzi_bl = 0; 3346 j = 0; 3347 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3348 vtmp[j] = rtmp[jtmp[j]]; 3349 rtmp[jtmp[j]] = 0.0; 3350 nzi_bl++; 3351 j++; 3352 } 3353 nzi_bu = nzi - nzi_bl - 1; 3354 while (j < nzi) { 3355 vtmp[j] = rtmp[jtmp[j]]; 3356 rtmp[jtmp[j]] = 0.0; 3357 j++; 3358 } 3359 3360 bjtmp = bj + bi[i]; 3361 batmp = ba + bi[i]; 3362 /* apply level dropping rule to L part */ 3363 ncut = nzi_al + dtcount; 3364 if (ncut < nzi_bl) { 3365 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3366 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3367 } else { 3368 ncut = nzi_bl; 3369 } 3370 for (j = 0; j < ncut; j++) { 3371 bjtmp[j] = jtmp[j]; 3372 batmp[j] = vtmp[j]; 3373 } 3374 bi[i + 1] = bi[i] + ncut; 3375 nzi = ncut + 1; 3376 3377 /* apply level dropping rule to U part */ 3378 ncut = nzi_au + dtcount; 3379 if (ncut < nzi_bu) { 3380 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3381 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3382 } else { 3383 ncut = nzi_bu; 3384 } 3385 nzi += ncut; 3386 3387 /* mark bdiagonal */ 3388 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3389 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3390 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3391 bjtmp = bj + bdiag[i]; 3392 batmp = ba + bdiag[i]; 3393 *bjtmp = i; 3394 *batmp = diag_tmp; /* rtmp[i]; */ 3395 if (*batmp == 0.0) *batmp = dt + shift; 3396 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3397 3398 bjtmp = bj + bdiag[i + 1] + 1; 3399 batmp = ba + bdiag[i + 1] + 1; 3400 for (k = 0; k < ncut; k++) { 3401 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3402 batmp[k] = vtmp[nzi_bl + 1 + k]; 3403 } 3404 3405 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3406 } /* for (i=0; i<n; i++) */ 3407 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3408 3409 PetscCall(ISRestoreIndices(isrow, &r)); 3410 PetscCall(ISRestoreIndices(isicol, &ic)); 3411 3412 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3413 PetscCall(PetscFree2(im, jtmp)); 3414 PetscCall(PetscFree2(rtmp, vtmp)); 3415 PetscCall(PetscFree(bdiag_rev)); 3416 3417 PetscCall(PetscLogFlops(B->cmap->n)); 3418 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3419 3420 PetscCall(ISIdentity(isrow, &row_identity)); 3421 PetscCall(ISIdentity(isicol, &icol_identity)); 3422 if (row_identity && icol_identity) { 3423 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3424 } else { 3425 B->ops->solve = MatSolve_SeqAIJ; 3426 } 3427 3428 B->ops->solveadd = NULL; 3429 B->ops->solvetranspose = NULL; 3430 B->ops->solvetransposeadd = NULL; 3431 B->ops->matsolve = NULL; 3432 B->assembled = PETSC_TRUE; 3433 B->preallocated = PETSC_TRUE; 3434 PetscFunctionReturn(PETSC_SUCCESS); 3435 } 3436 3437 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3438 /* 3439 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3440 */ 3441 3442 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3443 { 3444 PetscFunctionBegin; 3445 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3446 PetscFunctionReturn(PETSC_SUCCESS); 3447 } 3448 3449 /* 3450 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3451 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3452 */ 3453 /* 3454 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3455 */ 3456 3457 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3458 { 3459 Mat C = fact; 3460 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3461 IS isrow = b->row, isicol = b->icol; 3462 const PetscInt *r, *ic, *ics; 3463 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3464 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3465 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3466 PetscReal dt = info->dt, shift = info->shiftamount; 3467 PetscBool row_identity, col_identity; 3468 3469 PetscFunctionBegin; 3470 PetscCall(ISGetIndices(isrow, &r)); 3471 PetscCall(ISGetIndices(isicol, &ic)); 3472 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3473 ics = ic; 3474 3475 for (i = 0; i < n; i++) { 3476 /* initialize rtmp array */ 3477 nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 3478 bjtmp = bj + bi[i]; 3479 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3480 rtmp[i] = 0.0; 3481 nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3482 bjtmp = bj + bdiag[i + 1] + 1; 3483 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3484 3485 /* load in initial unfactored row of A */ 3486 nz = ai[r[i] + 1] - ai[r[i]]; 3487 ajtmp = aj + ai[r[i]]; 3488 v = aa + ai[r[i]]; 3489 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3490 3491 /* numerical factorization */ 3492 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3493 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3494 k = 0; 3495 while (k < nzl) { 3496 row = *bjtmp++; 3497 pc = rtmp + row; 3498 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3499 multiplier = (*pc) * (*pv); 3500 *pc = multiplier; 3501 if (PetscAbsScalar(multiplier) > dt) { 3502 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3503 pv = b->a + bdiag[row + 1] + 1; 3504 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3505 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3506 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3507 } 3508 k++; 3509 } 3510 3511 /* finished row so stick it into b->a */ 3512 /* L-part */ 3513 pv = b->a + bi[i]; 3514 pj = bj + bi[i]; 3515 nzl = bi[i + 1] - bi[i]; 3516 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3517 3518 /* diagonal: invert diagonal entries for simpler triangular solves */ 3519 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3520 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3521 3522 /* U-part */ 3523 pv = b->a + bdiag[i + 1] + 1; 3524 pj = bj + bdiag[i + 1] + 1; 3525 nzu = bdiag[i] - bdiag[i + 1] - 1; 3526 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3527 } 3528 3529 PetscCall(PetscFree(rtmp)); 3530 PetscCall(ISRestoreIndices(isicol, &ic)); 3531 PetscCall(ISRestoreIndices(isrow, &r)); 3532 3533 PetscCall(ISIdentity(isrow, &row_identity)); 3534 PetscCall(ISIdentity(isicol, &col_identity)); 3535 if (row_identity && col_identity) { 3536 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3537 } else { 3538 C->ops->solve = MatSolve_SeqAIJ; 3539 } 3540 C->ops->solveadd = NULL; 3541 C->ops->solvetranspose = NULL; 3542 C->ops->solvetransposeadd = NULL; 3543 C->ops->matsolve = NULL; 3544 C->assembled = PETSC_TRUE; 3545 C->preallocated = PETSC_TRUE; 3546 3547 PetscCall(PetscLogFlops(C->cmap->n)); 3548 PetscFunctionReturn(PETSC_SUCCESS); 3549 } 3550 #endif 3551