1b377110cSBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscbt.h> 5c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 63b2fbd54SBarry Smith 78fa24674SBarry Smith /* 88fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9312e372aSBarry Smith 10312e372aSBarry Smith This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 118fa24674SBarry Smith */ 129371c9d4SSatish Balay PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) { 138fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 14889a8dedSBarry Smith PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 158fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 168fa24674SBarry Smith const PetscScalar *aa = a->a; 17ace3abfcSBarry Smith PetscBool *done; 18889a8dedSBarry Smith PetscReal best, past = 0, future; 193a40ed3dSBarry Smith 208fa24674SBarry Smith PetscFunctionBegin; 218fa24674SBarry Smith /* pick initial row */ 228fa24674SBarry Smith best = -1; 238fa24674SBarry Smith for (i = 0; i < n; i++) { 2475567043SBarry Smith future = 0.0; 258fa24674SBarry Smith for (j = ai[i]; j < ai[i + 1]; j++) { 26db4deed7SKarl Rupp if (aj[j] != i) future += PetscAbsScalar(aa[j]); 27db4deed7SKarl Rupp else past = PetscAbsScalar(aa[j]); 288fa24674SBarry Smith } 298fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 308fa24674SBarry Smith if (past / future > best) { 318fa24674SBarry Smith best = past / future; 328fa24674SBarry Smith current = i; 338fa24674SBarry Smith } 348fa24674SBarry Smith } 358fa24674SBarry Smith 369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &done)); 379566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(done, n)); 389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &order)); 398fa24674SBarry Smith order[0] = current; 408fa24674SBarry Smith for (i = 0; i < n - 1; i++) { 418fa24674SBarry Smith done[current] = PETSC_TRUE; 428fa24674SBarry Smith best = -1; 438fa24674SBarry Smith /* loop over all neighbors of current pivot */ 448fa24674SBarry Smith for (j = ai[current]; j < ai[current + 1]; j++) { 458fa24674SBarry Smith jj = aj[j]; 468fa24674SBarry Smith if (done[jj]) continue; 478fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 488fa24674SBarry Smith past = future = 0.0; 498fa24674SBarry Smith for (k = ai[jj]; k < ai[jj + 1]; k++) { 508fa24674SBarry Smith kk = aj[k]; 518fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 528fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 538fa24674SBarry Smith } 548fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 558fa24674SBarry Smith if (past / future > best) { 568fa24674SBarry Smith best = past / future; 578fa24674SBarry Smith newcurrent = jj; 588fa24674SBarry Smith } 598fa24674SBarry Smith } 608fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 618fa24674SBarry Smith best = -1; 628fa24674SBarry Smith for (k = 0; k < n; k++) { 638fa24674SBarry Smith if (done[k]) continue; 6475567043SBarry Smith future = 0.0; 6575567043SBarry Smith past = 0.0; 668fa24674SBarry Smith for (j = ai[k]; j < ai[k + 1]; j++) { 678fa24674SBarry Smith kk = aj[j]; 688fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 698fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 708fa24674SBarry Smith } 718fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 728fa24674SBarry Smith if (past / future > best) { 738fa24674SBarry Smith best = past / future; 748fa24674SBarry Smith newcurrent = k; 758fa24674SBarry Smith } 768fa24674SBarry Smith } 778fa24674SBarry Smith } 7808401ef6SPierre Jolivet PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 798fa24674SBarry Smith current = newcurrent; 808fa24674SBarry Smith order[i + 1] = current; 818fa24674SBarry Smith } 829566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 838fa24674SBarry Smith *icol = *irow; 849566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)*irow)); 859566063dSJacob Faibussowitsch PetscCall(PetscFree(done)); 869566063dSJacob Faibussowitsch PetscCall(PetscFree(order)); 87d64ed03dSBarry Smith PetscFunctionReturn(0); 883b2fbd54SBarry Smith } 893b2fbd54SBarry Smith 909371c9d4SSatish Balay static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) { 914ac6704cSBarry Smith PetscFunctionBegin; 924ac6704cSBarry Smith *type = MATSOLVERPETSC; 934ac6704cSBarry Smith PetscFunctionReturn(0); 944ac6704cSBarry Smith } 954ac6704cSBarry Smith 969371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) { 97d0f46423SBarry Smith PetscInt n = A->rmap->n; 98b24902e0SBarry Smith 99b24902e0SBarry Smith PetscFunctionBegin; 100891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 101b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 102891bceaeSHong Zhang #endif 1039566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 1049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 105599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 1069566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 1072205254eSKarl Rupp 10835233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 10935233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 1102205254eSKarl Rupp 1119566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 1129566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 1139566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 1149566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 115b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1169566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 1179566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 1182205254eSKarl Rupp 11935233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 12035233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 1219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 1229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 123e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 124d5f3da31SBarry Smith (*B)->factortype = ftype; 12500c67f3bSHong Zhang 1269566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 1279566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 128f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 1299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 130b24902e0SBarry Smith PetscFunctionReturn(0); 131b24902e0SBarry Smith } 132b24902e0SBarry Smith 1339371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 134416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 135289bc588SBarry Smith IS isicol; 1365d0c19d7SBarry Smith const PetscInt *r, *ic; 1375d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 1381393bc97SHong Zhang PetscInt *bi, *bj, *ajtmp; 1391393bc97SHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 1409e046878SBarry Smith PetscReal f; 1414875ba38SHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1420298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 1431393bc97SHong Zhang PetscBT lnkbt; 144ece542f9SHong Zhang PetscBool missing; 145289bc588SBarry Smith 1463a40ed3dSBarry Smith PetscFunctionBegin; 14708401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 1489566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 14928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 150ece542f9SHong Zhang 1519566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1529566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 1539566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 154289bc588SBarry Smith 155289bc588SBarry Smith /* get new row pointers */ 1569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 1571393bc97SHong Zhang bi[0] = 0; 1581393bc97SHong Zhang 1591393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1611393bc97SHong Zhang bdiag[0] = 0; 1621393bc97SHong Zhang 1631393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1641393bc97SHong Zhang nlnk = n + 1; 1659566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 1661393bc97SHong Zhang 1679566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 1681393bc97SHong Zhang 1691393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 170d3d32019SBarry Smith f = info->fill; 171aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 1729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1731393bc97SHong Zhang current_space = free_space; 174289bc588SBarry Smith 175289bc588SBarry Smith for (i = 0; i < n; i++) { 1761393bc97SHong Zhang /* copy previous fill into linked list */ 177289bc588SBarry Smith nzi = 0; 1781393bc97SHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 1791393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 1809566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1811393bc97SHong Zhang nzi += nlnk; 1821393bc97SHong Zhang 1831393bc97SHong Zhang /* add pivot rows into linked list */ 1841393bc97SHong Zhang row = lnk[n]; 1851393bc97SHong Zhang while (row < i) { 186d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 1889566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 1891393bc97SHong Zhang nzi += nlnk; 1901393bc97SHong Zhang row = lnk[row]; 191289bc588SBarry Smith } 1921393bc97SHong Zhang bi[i + 1] = bi[i] + nzi; 1931393bc97SHong Zhang im[i] = nzi; 1941393bc97SHong Zhang 1951393bc97SHong Zhang /* mark bdiag */ 1961393bc97SHong Zhang nzbd = 0; 1971393bc97SHong Zhang nnz = nzi; 1981393bc97SHong Zhang k = lnk[n]; 1991393bc97SHong Zhang while (nnz-- && k < i) { 2001393bc97SHong Zhang nzbd++; 2011393bc97SHong Zhang k = lnk[k]; 2021393bc97SHong Zhang } 2031393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2041393bc97SHong Zhang 2051393bc97SHong Zhang /* if free space is not available, make more free space */ 2061393bc97SHong Zhang if (current_space->local_remaining < nzi) { 207f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 2089566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2091393bc97SHong Zhang reallocs++; 2101393bc97SHong Zhang } 2111393bc97SHong Zhang 2121393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2139566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 2142205254eSKarl Rupp 2151393bc97SHong Zhang bi_ptr[i] = current_space->array; 2161393bc97SHong Zhang current_space->array += nzi; 2171393bc97SHong Zhang current_space->local_used += nzi; 2181393bc97SHong Zhang current_space->local_remaining -= nzi; 219289bc588SBarry Smith } 2206cf91177SBarry Smith #if defined(PETSC_USE_INFO) 221464e8d44SSatish Balay if (ai[n] != 0) { 2221393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2239566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 2269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 22705bf559cSSatish Balay } else { 2289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 22905bf559cSSatish Balay } 23063ba0a88SBarry Smith #endif 2312fb3ed76SBarry Smith 2329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2339566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 2341987afe7SBarry Smith 2351393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2379566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 2389566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 240289bc588SBarry Smith 241289bc588SBarry Smith /* put together the new matrix */ 2429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 2439566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 244719d5645SBarry Smith b = (Mat_SeqAIJ *)(B)->data; 2452205254eSKarl Rupp 246e6b907acSBarry Smith b->free_a = PETSC_TRUE; 247e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2487c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2492205254eSKarl Rupp 2509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 2511393bc97SHong Zhang b->j = bj; 2521393bc97SHong Zhang b->i = bi; 2531393bc97SHong Zhang b->diag = bdiag; 254f4259b30SLisandro Dalcin b->ilen = NULL; 255f4259b30SLisandro Dalcin b->imax = NULL; 256416022c9SBarry Smith b->row = isrow; 257416022c9SBarry Smith b->col = iscol; 2589566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 2599566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 26082bf6240SBarry Smith b->icol = isicol; 2619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2621393bc97SHong Zhang 2631393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 2649566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 2651393bc97SHong Zhang b->maxnz = b->nz = bi[n]; 2667fd98bd6SLois Curfman McInnes 267d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 268719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 269719d5645SBarry Smith (B)->info.fill_ratio_given = f; 270703deb49SSatish Balay 2719f7d0b68SHong Zhang if (ai[n]) { 272719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 273eea2913aSSatish Balay } else { 274719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 275eea2913aSSatish Balay } 276ad04f41aSHong Zhang (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2779371c9d4SSatish Balay if (a->inode.size) { (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 2783a40ed3dSBarry Smith PetscFunctionReturn(0); 279289bc588SBarry Smith } 2801393bc97SHong Zhang 2819371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 282ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 283ce3d78c0SShri Abhyankar IS isicol; 2848758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 2858758e1faSBarry Smith PetscInt i, n = A->rmap->n; 2868758e1faSBarry Smith PetscInt *bi, *bj; 287ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 288ce3d78c0SShri Abhyankar PetscReal f; 289ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 2900298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 291ce3d78c0SShri Abhyankar PetscBT lnkbt; 292ece542f9SHong Zhang PetscBool missing; 293ce3d78c0SShri Abhyankar 294ce3d78c0SShri Abhyankar PetscFunctionBegin; 29508401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 2969566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 29728b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 298ece542f9SHong Zhang 2999566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 302ce3d78c0SShri Abhyankar 3031bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 306a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 3079b48462bSShri Abhyankar 3089b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 3099b48462bSShri Abhyankar nlnk = n + 1; 3109566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3119b48462bSShri Abhyankar 3129566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 3139b48462bSShri Abhyankar 3149b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 3159b48462bSShri Abhyankar f = info->fill; 316aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 3179566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 3189b48462bSShri Abhyankar current_space = free_space; 3199b48462bSShri Abhyankar 3209b48462bSShri Abhyankar for (i = 0; i < n; i++) { 3219b48462bSShri Abhyankar /* copy previous fill into linked list */ 3229b48462bSShri Abhyankar nzi = 0; 3239b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 3249b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 3259566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3269b48462bSShri Abhyankar nzi += nlnk; 3279b48462bSShri Abhyankar 3289b48462bSShri Abhyankar /* add pivot rows into linked list */ 3299b48462bSShri Abhyankar row = lnk[n]; 3309b48462bSShri Abhyankar while (row < i) { 3319b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 3329b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3339566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 3349b48462bSShri Abhyankar nzi += nlnk; 3359b48462bSShri Abhyankar row = lnk[row]; 3369b48462bSShri Abhyankar } 3379b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 3389b48462bSShri Abhyankar im[i] = nzi; 3399b48462bSShri Abhyankar 3409b48462bSShri Abhyankar /* mark bdiag */ 3419b48462bSShri Abhyankar nzbd = 0; 3429b48462bSShri Abhyankar nnz = nzi; 3439b48462bSShri Abhyankar k = lnk[n]; 3449b48462bSShri Abhyankar while (nnz-- && k < i) { 3459b48462bSShri Abhyankar nzbd++; 3469b48462bSShri Abhyankar k = lnk[k]; 3479b48462bSShri Abhyankar } 3489b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 3499b48462bSShri Abhyankar 3509b48462bSShri Abhyankar /* if free space is not available, make more free space */ 3519b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 3522f18eb33SBarry Smith /* estimated additional space needed */ 3531df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 3549566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 3559b48462bSShri Abhyankar reallocs++; 3569b48462bSShri Abhyankar } 3579b48462bSShri Abhyankar 3589b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 3599566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 3602205254eSKarl Rupp 3619b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 3629b48462bSShri Abhyankar current_space->array += nzi; 3639b48462bSShri Abhyankar current_space->local_used += nzi; 3649b48462bSShri Abhyankar current_space->local_remaining -= nzi; 3659b48462bSShri Abhyankar } 3669b48462bSShri Abhyankar 3679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3689566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3699b48462bSShri Abhyankar 3709263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 3719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 3729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 3739566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 3749566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 3759b48462bSShri Abhyankar 3769b48462bSShri Abhyankar /* put together the new matrix */ 3779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3789566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 3799b48462bSShri Abhyankar b = (Mat_SeqAIJ *)(B)->data; 3802205254eSKarl Rupp 3819b48462bSShri Abhyankar b->free_a = PETSC_TRUE; 3829b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 3839b48462bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 3842205254eSKarl Rupp 3859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 3862205254eSKarl Rupp 3879b48462bSShri Abhyankar b->j = bj; 3889b48462bSShri Abhyankar b->i = bi; 3899b48462bSShri Abhyankar b->diag = bdiag; 390f4259b30SLisandro Dalcin b->ilen = NULL; 391f4259b30SLisandro Dalcin b->imax = NULL; 3929b48462bSShri Abhyankar b->row = isrow; 3939b48462bSShri Abhyankar b->col = iscol; 3949566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3959566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3969b48462bSShri Abhyankar b->icol = isicol; 3979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3989b48462bSShri Abhyankar 3999b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 4009566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 4019b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 4022205254eSKarl Rupp 403d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 404f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 405f284f12aSHong Zhang B->info.fill_ratio_given = f; 4069b48462bSShri Abhyankar 4079b48462bSShri Abhyankar if (ai[n]) { 408f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 4099b48462bSShri Abhyankar } else { 410f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 4119b48462bSShri Abhyankar } 4129263d837SHong Zhang #if defined(PETSC_USE_INFO) 4139263d837SHong Zhang if (ai[n] != 0) { 4149263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 4159566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4169566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4179566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 4199263d837SHong Zhang } else { 4209566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 4219263d837SHong Zhang } 4229263d837SHong Zhang #endif 42335233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 4249371c9d4SSatish Balay if (a->inode.size) { B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 4259566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 4269b48462bSShri Abhyankar PetscFunctionReturn(0); 4279b48462bSShri Abhyankar } 4289b48462bSShri Abhyankar 4295b5bc046SBarry Smith /* 4305b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4315b5bc046SBarry Smith */ 4329371c9d4SSatish Balay PetscErrorCode MatFactorDumpMatrix(Mat A) { 433ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 4345b5bc046SBarry Smith 4355b5bc046SBarry Smith PetscFunctionBegin; 4369566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 4375b5bc046SBarry Smith if (flg) { 4385b5bc046SBarry Smith PetscViewer viewer; 4395b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4405b5bc046SBarry Smith 4419566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 4429566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 4439566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 4449566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 4455b5bc046SBarry Smith } 4465b5bc046SBarry Smith PetscFunctionReturn(0); 4475b5bc046SBarry Smith } 4485b5bc046SBarry Smith 4499371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 450c9c72f8fSHong Zhang Mat C = B; 451c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 452c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 453c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 454bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 455bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 456bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 457bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 458bbd65245SShri Abhyankar const MatScalar *aa = a->a, *v; 459ace3abfcSBarry Smith PetscBool row_identity, col_identity; 460d90ac83dSHong Zhang FactorShiftCtx sctx; 4614f81c4b7SBarry Smith const PetscInt *ddiag; 462d4ad06f7SHong Zhang PetscReal rs; 463d4ad06f7SHong Zhang MatScalar d; 464d4ad06f7SHong Zhang 465c9c72f8fSHong Zhang PetscFunctionBegin; 466d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4679566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 468d4ad06f7SHong Zhang 469f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 470d4ad06f7SHong Zhang ddiag = a->diag; 471d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 472d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 473d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 474d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 475d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 476d4ad06f7SHong Zhang v = aa + ai[i]; 477d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 4782205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 479d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 480d4ad06f7SHong Zhang } 481d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 482d4ad06f7SHong Zhang sctx.nshift_max = 5; 483d4ad06f7SHong Zhang sctx.shift_lo = 0.; 484d4ad06f7SHong Zhang sctx.shift_hi = 1.; 485d4ad06f7SHong Zhang } 486d4ad06f7SHong Zhang 4879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4889566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 490c9c72f8fSHong Zhang ics = ic; 491c9c72f8fSHong Zhang 492d4ad06f7SHong Zhang do { 49307b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 494c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 495c9c72f8fSHong Zhang /* zero rtmp */ 496c9c72f8fSHong Zhang /* L part */ 497c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 498c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 499c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 500c9c72f8fSHong Zhang 501c9c72f8fSHong Zhang /* U part */ 50262fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 50362fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 504813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 505813a1f2bSShri Abhyankar 506813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 507813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 508813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 509813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 5109371c9d4SSatish Balay for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; } 511813a1f2bSShri Abhyankar /* ZeropivotApply() */ 51298a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 513813a1f2bSShri Abhyankar 514813a1f2bSShri Abhyankar /* elimination */ 515813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 516813a1f2bSShri Abhyankar row = *bjtmp++; 517813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 518f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 519813a1f2bSShri Abhyankar pc = rtmp + row; 520813a1f2bSShri Abhyankar if (*pc != 0.0) { 521813a1f2bSShri Abhyankar pv = b->a + bdiag[row]; 522813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 523813a1f2bSShri Abhyankar *pc = multiplier; 5242205254eSKarl Rupp 52562fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 52662fbd6afSShri Abhyankar pv = b->a + bdiag[row + 1] + 1; 52762fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 5282205254eSKarl Rupp 529813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 5309566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 531813a1f2bSShri Abhyankar } 532f268cba8SShri Abhyankar row = *bjtmp++; 533813a1f2bSShri Abhyankar } 534813a1f2bSShri Abhyankar 535813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 536813a1f2bSShri Abhyankar rs = 0.0; 537813a1f2bSShri Abhyankar /* L part */ 538813a1f2bSShri Abhyankar pv = b->a + bi[i]; 539813a1f2bSShri Abhyankar pj = b->j + bi[i]; 540813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 541813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5429371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5439371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 544813a1f2bSShri Abhyankar } 545813a1f2bSShri Abhyankar 546813a1f2bSShri Abhyankar /* U part */ 54762fbd6afSShri Abhyankar pv = b->a + bdiag[i + 1] + 1; 54862fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 54962fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 550813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5519371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5529371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 553813a1f2bSShri Abhyankar } 554813a1f2bSShri Abhyankar 555813a1f2bSShri Abhyankar sctx.rs = rs; 556813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 5579566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 55807b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 55907b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 560813a1f2bSShri Abhyankar 561a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 562813a1f2bSShri Abhyankar pv = b->a + bdiag[i]; 563813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 564813a1f2bSShri Abhyankar 565813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 566813a1f2bSShri Abhyankar 5678ff23777SHong Zhang /* MatPivotRefine() */ 56807b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 569813a1f2bSShri Abhyankar /* 570813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 571813a1f2bSShri Abhyankar * then try lower shift 572813a1f2bSShri Abhyankar */ 573813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 574813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 575813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 57607b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 577813a1f2bSShri Abhyankar sctx.nshift++; 578813a1f2bSShri Abhyankar } 57907b50cabSHong Zhang } while (sctx.newshift); 580813a1f2bSShri Abhyankar 5819566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 584d3ac4fa3SBarry Smith 5859566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5869566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 587abb87a52SBarry Smith if (b->inode.size) { 588abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 589abb87a52SBarry Smith } else if (row_identity && col_identity) { 59035233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 591813a1f2bSShri Abhyankar } else { 59235233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 593813a1f2bSShri Abhyankar } 59435233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 59535233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 59635233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 59735233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 598813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 599813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 6002205254eSKarl Rupp 6019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 602813a1f2bSShri Abhyankar 60314d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 604813a1f2bSShri Abhyankar if (sctx.nshift) { 605f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 6069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 607f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 6089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 609f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 6109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 611813a1f2bSShri Abhyankar } 612813a1f2bSShri Abhyankar } 613813a1f2bSShri Abhyankar PetscFunctionReturn(0); 614813a1f2bSShri Abhyankar } 615813a1f2bSShri Abhyankar 6169371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 617719d5645SBarry Smith Mat C = B; 618416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 61982bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 6205d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 621d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 622d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 623d278edc7SBarry Smith const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 624d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 625d278edc7SBarry Smith const MatScalar *v, *aa = a->a; 62633f9893dSHong Zhang PetscReal rs = 0.0; 627d90ac83dSHong Zhang FactorShiftCtx sctx; 6284f81c4b7SBarry Smith const PetscInt *ddiag; 629ace3abfcSBarry Smith PetscBool row_identity, col_identity; 630289bc588SBarry Smith 6313a40ed3dSBarry Smith PetscFunctionBegin; 632504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 6339566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 634ada7143aSSatish Balay 635f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 63642898933SHong Zhang ddiag = a->diag; 6379f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 6386cc28720Svictorle for (i = 0; i < n; i++) { 6399f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 6409f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 6411a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 6429f7d0b68SHong Zhang v = aa + ai[i]; 6439f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 6442205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 6451a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 6466cc28720Svictorle } 647118f5789SBarry Smith sctx.shift_top *= 1.1; 648d2276718SHong Zhang sctx.nshift_max = 5; 649d2276718SHong Zhang sctx.shift_lo = 0.; 650d2276718SHong Zhang sctx.shift_hi = 1.; 651a0a356efSHong Zhang } 652a0a356efSHong Zhang 6539566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6549566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 656504a3fadSHong Zhang ics = ic; 657504a3fadSHong Zhang 658085256b3SBarry Smith do { 65907b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 660289bc588SBarry Smith for (i = 0; i < n; i++) { 661b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 662b3bf805bSHong Zhang bjtmp = bj + bi[i]; 6639f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 664289bc588SBarry Smith 665289bc588SBarry Smith /* load in initial (unfactored row) */ 6669f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 6679f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 6689f7d0b68SHong Zhang v = aa + ai[r[i]]; 6699371c9d4SSatish Balay for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; } 670d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 671289bc588SBarry Smith 672b3bf805bSHong Zhang row = *bjtmp++; 673f2582111SSatish Balay while (row < i) { 6748c37ef55SBarry Smith pc = rtmp + row; 6758c37ef55SBarry Smith if (*pc != 0.0) { 676010a20caSHong Zhang pv = b->a + diag_offset[row]; 677010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 67835aab85fSBarry Smith multiplier = *pc / *pv++; 6798c37ef55SBarry Smith *pc = multiplier; 680b3bf805bSHong Zhang nz = bi[row + 1] - diag_offset[row] - 1; 6819f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6829566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 683289bc588SBarry Smith } 684b3bf805bSHong Zhang row = *bjtmp++; 685289bc588SBarry Smith } 686416022c9SBarry Smith /* finished row so stick it into b->a */ 687b3bf805bSHong Zhang pv = b->a + bi[i]; 688b3bf805bSHong Zhang pj = b->j + bi[i]; 689b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 690b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 691e57ff13aSBarry Smith rs = 0.0; 692d3d32019SBarry Smith for (j = 0; j < nz; j++) { 6939f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 6949f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 695d3d32019SBarry Smith } 696e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 697d2276718SHong Zhang 698d2276718SHong Zhang sctx.rs = rs; 699d2276718SHong Zhang sctx.pv = pv[diag]; 7009566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 70107b50cabSHong Zhang if (sctx.newshift) break; 702504a3fadSHong Zhang pv[diag] = sctx.pv; 70335aab85fSBarry Smith } 704d2276718SHong Zhang 70507b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 7066cc28720Svictorle /* 7076c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 7086cc28720Svictorle * then try lower shift 7096cc28720Svictorle */ 7100481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 7110481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 7120481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 71307b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 714d2276718SHong Zhang sctx.nshift++; 7156cc28720Svictorle } 71607b50cabSHong Zhang } while (sctx.newshift); 7170f11f9aeSSatish Balay 718a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 7199371c9d4SSatish Balay for (i = 0; i < n; i++) { b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; } 7209566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7219566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7229566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 723d3ac4fa3SBarry Smith 7249566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 7259566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 7268d8c7c43SBarry Smith if (row_identity && col_identity) { 727ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 7288d8c7c43SBarry Smith } else { 729ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 730db4efbfdSBarry Smith } 731ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 732ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 733ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 734ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 7352205254eSKarl Rupp 736c456f294SBarry Smith C->assembled = PETSC_TRUE; 7375c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 7382205254eSKarl Rupp 7399566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 740d2276718SHong Zhang if (sctx.nshift) { 741f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7429566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 743f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7449566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 7456cc28720Svictorle } 7460697b6caSHong Zhang } 747d3ac4fa3SBarry Smith (C)->ops->solve = MatSolve_SeqAIJ_inplace; 748d3ac4fa3SBarry Smith (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 7492205254eSKarl Rupp 7509566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 7513a40ed3dSBarry Smith PetscFunctionReturn(0); 752289bc588SBarry Smith } 7530889b5dcSvictorle 754137fb511SHong Zhang /* 755137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 756137fb511SHong Zhang Input: 757137fb511SHong Zhang A - original matrix 758137fb511SHong Zhang Output; 759137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 760137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 761137fb511SHong Zhang a->a reordered accordingly with a->j 762137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 763137fb511SHong Zhang */ 7649371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) { 765b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 766b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 7675d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 7685d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 7695d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 770b051339dSHong Zhang PetscInt *diag = a->diag, nbdiag, *pj; 771a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 772504a3fadSHong Zhang MatScalar *pv, *v; 773137fb511SHong Zhang PetscReal rs; 774d90ac83dSHong Zhang FactorShiftCtx sctx; 775504a3fadSHong Zhang const MatScalar *aa = a->a, *vtmp; 776137fb511SHong Zhang 777137fb511SHong Zhang PetscFunctionBegin; 77808401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 779504a3fadSHong Zhang 780504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 7819566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 782504a3fadSHong Zhang 783504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 784504a3fadSHong Zhang const PetscInt *ddiag = a->diag; 785504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 786504a3fadSHong Zhang for (i = 0; i < n; i++) { 787504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 788504a3fadSHong Zhang d = (aa)[ddiag[i]]; 789504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 790504a3fadSHong Zhang vtmp = aa + ai[i]; 791504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 7922205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 793504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 794504a3fadSHong Zhang } 795504a3fadSHong Zhang sctx.shift_top *= 1.1; 796504a3fadSHong Zhang sctx.nshift_max = 5; 797504a3fadSHong Zhang sctx.shift_lo = 0.; 798504a3fadSHong Zhang sctx.shift_hi = 1.; 799504a3fadSHong Zhang } 800504a3fadSHong Zhang 8019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 8029566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 8039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 8049566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 805b051339dSHong Zhang ics = ic; 806137fb511SHong Zhang 807504a3fadSHong Zhang #if defined(MV) 80875567043SBarry Smith sctx.shift_top = 0.; 809e60cf9a0SBarry Smith sctx.nshift_max = 0; 81075567043SBarry Smith sctx.shift_lo = 0.; 81175567043SBarry Smith sctx.shift_hi = 0.; 81275567043SBarry Smith sctx.shift_fraction = 0.; 813137fb511SHong Zhang 814f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 81575567043SBarry Smith sctx.shift_top = 0.; 816137fb511SHong Zhang for (i = 0; i < n; i++) { 817137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 818b051339dSHong Zhang d = (a->a)[diag[i]]; 819137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 820b051339dSHong Zhang v = a->a + ai[i]; 821b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 8222205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 823137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 824137fb511SHong Zhang } 825137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 826137fb511SHong Zhang sctx.shift_top *= 1.1; 827137fb511SHong Zhang sctx.nshift_max = 5; 828137fb511SHong Zhang sctx.shift_lo = 0.; 829137fb511SHong Zhang sctx.shift_hi = 1.; 830137fb511SHong Zhang } 831137fb511SHong Zhang 83275567043SBarry Smith sctx.shift_amount = 0.; 833137fb511SHong Zhang sctx.nshift = 0; 834504a3fadSHong Zhang #endif 835504a3fadSHong Zhang 836137fb511SHong Zhang do { 83707b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 838137fb511SHong Zhang for (i = 0; i < n; i++) { 839b051339dSHong Zhang /* load in initial unfactored row */ 840b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 841b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 842b051339dSHong Zhang v = a->a + ai[r[i]]; 843137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 844b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 8459566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 846137fb511SHong Zhang 847b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 848137fb511SHong Zhang for (j = 0; j < nz; j++) { 849137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 850b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 851137fb511SHong Zhang } 852137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 853137fb511SHong Zhang 854b051339dSHong Zhang row = *ajtmp++; 855137fb511SHong Zhang while (row < i) { 856137fb511SHong Zhang pc = rtmp + row; 857137fb511SHong Zhang if (*pc != 0.0) { 858b051339dSHong Zhang pv = a->a + diag[r[row]]; 859b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 860137fb511SHong Zhang 861137fb511SHong Zhang multiplier = *pc / *pv++; 862137fb511SHong Zhang *pc = multiplier; 863b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 864b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 8659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 866137fb511SHong Zhang } 867b051339dSHong Zhang row = *ajtmp++; 868137fb511SHong Zhang } 869b051339dSHong Zhang /* finished row so overwrite it onto a->a */ 870b051339dSHong Zhang pv = a->a + ai[r[i]]; 871b051339dSHong Zhang pj = aj + ai[r[i]]; 872b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 873b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 874137fb511SHong Zhang 875137fb511SHong Zhang rs = 0.0; 876137fb511SHong Zhang for (j = 0; j < nz; j++) { 877b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 878b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 879137fb511SHong Zhang } 880137fb511SHong Zhang 881137fb511SHong Zhang sctx.rs = rs; 882b051339dSHong Zhang sctx.pv = pv[nbdiag]; 8839566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 88407b50cabSHong Zhang if (sctx.newshift) break; 885504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 886137fb511SHong Zhang } 887137fb511SHong Zhang 88807b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 889137fb511SHong Zhang /* 890137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 891137fb511SHong Zhang * then try lower shift 892137fb511SHong Zhang */ 8930481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 8940481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 8950481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 89607b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 897137fb511SHong Zhang sctx.nshift++; 898137fb511SHong Zhang } 89907b50cabSHong Zhang } while (sctx.newshift); 900137fb511SHong Zhang 901a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 9029371c9d4SSatish Balay for (i = 0; i < n; i++) { a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; } 903137fb511SHong Zhang 9049566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 9059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 9069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 9072205254eSKarl Rupp 908b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 909ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 910ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 911ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 9122205254eSKarl Rupp 913b051339dSHong Zhang A->assembled = PETSC_TRUE; 9145c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 9152205254eSKarl Rupp 9169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 917137fb511SHong Zhang if (sctx.nshift) { 918f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 9199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 920f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 9219566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 922137fb511SHong Zhang } 923137fb511SHong Zhang } 924137fb511SHong Zhang PetscFunctionReturn(0); 925137fb511SHong Zhang } 926137fb511SHong Zhang 9270a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9289371c9d4SSatish Balay PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) { 929416022c9SBarry Smith Mat C; 930416022c9SBarry Smith 9313a40ed3dSBarry Smith PetscFunctionBegin; 9329566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 9339566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 9349566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 9352205254eSKarl Rupp 936db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 937db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 9382205254eSKarl Rupp 9399566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 9409566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)((Mat_SeqAIJ *)(A->data))->icol)); 9413a40ed3dSBarry Smith PetscFunctionReturn(0); 942da3a660dSBarry Smith } 9430a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9441d098868SHong Zhang 9459371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 946416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 947416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 9485d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 9495d0c19d7SBarry Smith PetscInt nz; 9505d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 951dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 952d9fead3dSBarry Smith const PetscScalar *b; 953dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 9548c37ef55SBarry Smith 9553a40ed3dSBarry Smith PetscFunctionBegin; 9563a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 95791bf9adeSBarry Smith 9589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 9599566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 960416022c9SBarry Smith tmp = a->solve_work; 9618c37ef55SBarry Smith 9629371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 9639371c9d4SSatish Balay r = rout; 9649371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 9659371c9d4SSatish Balay c = cout + (n - 1); 9668c37ef55SBarry Smith 9678c37ef55SBarry Smith /* forward solve the lower triangular */ 9688c37ef55SBarry Smith tmp[0] = b[*r++]; 969010a20caSHong Zhang tmps = tmp; 9708c37ef55SBarry Smith for (i = 1; i < n; i++) { 971010a20caSHong Zhang v = aa + ai[i]; 972010a20caSHong Zhang vi = aj + ai[i]; 97353e7425aSBarry Smith nz = a->diag[i] - ai[i]; 97453e7425aSBarry Smith sum = b[*r++]; 975003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 9768c37ef55SBarry Smith tmp[i] = sum; 9778c37ef55SBarry Smith } 9788c37ef55SBarry Smith 9798c37ef55SBarry Smith /* backward solve the upper triangular */ 9808c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 981010a20caSHong Zhang v = aa + a->diag[i] + 1; 982010a20caSHong Zhang vi = aj + a->diag[i] + 1; 983416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 9848c37ef55SBarry Smith sum = tmp[i]; 985003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 986010a20caSHong Zhang x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 9878c37ef55SBarry Smith } 9888c37ef55SBarry Smith 9899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 9909566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 9919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 9929566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 9939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 9943a40ed3dSBarry Smith PetscFunctionReturn(0); 9958c37ef55SBarry Smith } 996026e39d0SSatish Balay 9979371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) { 9982bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 9992bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 10005d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1001910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10025d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 1003910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1004910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1005910cf402Sprj- PetscBool isdense; 10062bebee5dSHong Zhang 10072bebee5dSHong Zhang PetscFunctionBegin; 10082bebee5dSHong Zhang if (!n) PetscFunctionReturn(0); 10099566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 101028b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1011f9fb9879SHong Zhang if (X != B) { 10129566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 101328b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1014f9fb9879SHong Zhang } 10159566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10169566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10179566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10189566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10199371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10209371c9d4SSatish Balay r = rout; 10219371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10229371c9d4SSatish Balay c = cout; 1023a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 10242bebee5dSHong Zhang /* forward solve the lower triangular */ 10252bebee5dSHong Zhang tmp[0] = b[r[0]]; 10262bebee5dSHong Zhang tmps = tmp; 10272bebee5dSHong Zhang for (i = 1; i < n; i++) { 10282bebee5dSHong Zhang v = aa + ai[i]; 10292bebee5dSHong Zhang vi = aj + ai[i]; 10302bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 10312bebee5dSHong Zhang sum = b[r[i]]; 1032003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10332bebee5dSHong Zhang tmp[i] = sum; 10342bebee5dSHong Zhang } 10352bebee5dSHong Zhang /* backward solve the upper triangular */ 10362bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 10372bebee5dSHong Zhang v = aa + a->diag[i] + 1; 10382bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 10392bebee5dSHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 10402bebee5dSHong Zhang sum = tmp[i]; 1041003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10422bebee5dSHong Zhang x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 10432bebee5dSHong Zhang } 1044910cf402Sprj- b += ldb; 1045910cf402Sprj- x += ldx; 10462bebee5dSHong Zhang } 10479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10499566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 10509566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 10519566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 10522bebee5dSHong Zhang PetscFunctionReturn(0); 10532bebee5dSHong Zhang } 10542bebee5dSHong Zhang 10559371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) { 10569bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10579bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 10589bd0847aSShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1059910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10609bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 1061910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 1062910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1063910cf402Sprj- PetscBool isdense; 10649bd0847aSShri Abhyankar 10659bd0847aSShri Abhyankar PetscFunctionBegin; 10669bd0847aSShri Abhyankar if (!n) PetscFunctionReturn(0); 10679566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 106828b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1069f9fb9879SHong Zhang if (X != B) { 10709566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 107128b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1072f9fb9879SHong Zhang } 10739566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10749566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10759566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10769566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10779371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10789371c9d4SSatish Balay r = rout; 10799371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10809371c9d4SSatish Balay c = cout; 10819bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 10829bd0847aSShri Abhyankar /* forward solve the lower triangular */ 10839bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 10849bd0847aSShri Abhyankar v = aa; 10859bd0847aSShri Abhyankar vi = aj; 10869bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 10879bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 10889bd0847aSShri Abhyankar sum = b[r[i]]; 10899bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 10909bd0847aSShri Abhyankar tmp[i] = sum; 10919371c9d4SSatish Balay v += nz; 10929371c9d4SSatish Balay vi += nz; 10939bd0847aSShri Abhyankar } 10949bd0847aSShri Abhyankar /* backward solve the upper triangular */ 10959bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 10969bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 10979bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 10989bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 10999bd0847aSShri Abhyankar sum = tmp[i]; 11009bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 11019bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 11029bd0847aSShri Abhyankar } 1103910cf402Sprj- b += ldb; 1104910cf402Sprj- x += ldx; 11059bd0847aSShri Abhyankar } 11069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11089566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 11099566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 11109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 11119bd0847aSShri Abhyankar PetscFunctionReturn(0); 11129bd0847aSShri Abhyankar } 11139bd0847aSShri Abhyankar 11149371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) { 1115137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1116137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 11175d0c19d7SBarry Smith const PetscInt *r, *c, *rout, *cout; 11185d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 11195d0c19d7SBarry Smith PetscInt nz, row; 1120fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1121fdc842d1SBarry Smith const PetscScalar *b; 1122dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1123137fb511SHong Zhang 1124137fb511SHong Zhang PetscFunctionBegin; 1125137fb511SHong Zhang if (!n) PetscFunctionReturn(0); 1126137fb511SHong Zhang 11279566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11289566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1129137fb511SHong Zhang tmp = a->solve_work; 1130137fb511SHong Zhang 11319371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11329371c9d4SSatish Balay r = rout; 11339371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11349371c9d4SSatish Balay c = cout + (n - 1); 1135137fb511SHong Zhang 1136137fb511SHong Zhang /* forward solve the lower triangular */ 1137137fb511SHong Zhang tmp[0] = b[*r++]; 1138137fb511SHong Zhang tmps = tmp; 1139137fb511SHong Zhang for (row = 1; row < n; row++) { 1140137fb511SHong Zhang i = rout[row]; /* permuted row */ 1141137fb511SHong Zhang v = aa + ai[i]; 1142137fb511SHong Zhang vi = aj + ai[i]; 1143137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1144137fb511SHong Zhang sum = b[*r++]; 1145003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1146137fb511SHong Zhang tmp[row] = sum; 1147137fb511SHong Zhang } 1148137fb511SHong Zhang 1149137fb511SHong Zhang /* backward solve the upper triangular */ 1150137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1151137fb511SHong Zhang i = rout[row]; /* permuted row */ 1152137fb511SHong Zhang v = aa + a->diag[i] + 1; 1153137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1154137fb511SHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 1155137fb511SHong Zhang sum = tmp[row]; 1156003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1157137fb511SHong Zhang x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1158137fb511SHong Zhang } 1159137fb511SHong Zhang 11609566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11629566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 11649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1165137fb511SHong Zhang PetscFunctionReturn(0); 1166137fb511SHong Zhang } 1167137fb511SHong Zhang 1168930ae53cSBarry Smith /* ----------------------------------------------------------- */ 1169c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 11709371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 1171930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1172d0f46423SBarry Smith PetscInt n = A->rmap->n; 1173b377110cSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1174356650c2SBarry Smith PetscScalar *x; 1175356650c2SBarry Smith const PetscScalar *b; 1176dd6ea824SBarry Smith const MatScalar *aa = a->a; 1177aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1178356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1179b377110cSBarry Smith const PetscInt *vi; 1180dd6ea824SBarry Smith const MatScalar *v; 1181dd6ea824SBarry Smith PetscScalar sum; 1182d85afc42SSatish Balay #endif 1183930ae53cSBarry Smith 11843a40ed3dSBarry Smith PetscFunctionBegin; 11853a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 1186930ae53cSBarry Smith 11879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11889566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1189930ae53cSBarry Smith 1190aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 11911c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 11921c4feecaSSatish Balay #else 1193930ae53cSBarry Smith /* forward solve the lower triangular */ 1194930ae53cSBarry Smith x[0] = b[0]; 1195930ae53cSBarry Smith for (i = 1; i < n; i++) { 1196930ae53cSBarry Smith ai_i = ai[i]; 1197930ae53cSBarry Smith v = aa + ai_i; 1198930ae53cSBarry Smith vi = aj + ai_i; 1199930ae53cSBarry Smith nz = adiag[i] - ai_i; 1200930ae53cSBarry Smith sum = b[i]; 1201003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1202930ae53cSBarry Smith x[i] = sum; 1203930ae53cSBarry Smith } 1204930ae53cSBarry Smith 1205930ae53cSBarry Smith /* backward solve the upper triangular */ 1206930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1207930ae53cSBarry Smith adiag_i = adiag[i]; 1208d708749eSBarry Smith v = aa + adiag_i + 1; 1209d708749eSBarry Smith vi = aj + adiag_i + 1; 1210930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1211930ae53cSBarry Smith sum = x[i]; 1212003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1213930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1214930ae53cSBarry Smith } 12151c4feecaSSatish Balay #endif 12169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12189566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 12193a40ed3dSBarry Smith PetscFunctionReturn(0); 1220930ae53cSBarry Smith } 1221930ae53cSBarry Smith 12229371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) { 1223416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1224416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 122504fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 12265d0c19d7SBarry Smith PetscInt nz; 122704fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 122804fbf559SBarry Smith PetscScalar *x, *tmp, sum; 122904fbf559SBarry Smith const PetscScalar *b; 1230dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1231da3a660dSBarry Smith 12323a40ed3dSBarry Smith PetscFunctionBegin; 12339566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1234da3a660dSBarry Smith 12359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12369566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1237416022c9SBarry Smith tmp = a->solve_work; 1238da3a660dSBarry Smith 12399371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12409371c9d4SSatish Balay r = rout; 12419371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12429371c9d4SSatish Balay c = cout + (n - 1); 1243da3a660dSBarry Smith 1244da3a660dSBarry Smith /* forward solve the lower triangular */ 1245da3a660dSBarry Smith tmp[0] = b[*r++]; 1246da3a660dSBarry Smith for (i = 1; i < n; i++) { 1247010a20caSHong Zhang v = aa + ai[i]; 1248010a20caSHong Zhang vi = aj + ai[i]; 1249416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1250da3a660dSBarry Smith sum = b[*r++]; 125104fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1252da3a660dSBarry Smith tmp[i] = sum; 1253da3a660dSBarry Smith } 1254da3a660dSBarry Smith 1255da3a660dSBarry Smith /* backward solve the upper triangular */ 1256da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1257010a20caSHong Zhang v = aa + a->diag[i] + 1; 1258010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1259416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 1260da3a660dSBarry Smith sum = tmp[i]; 126104fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1262010a20caSHong Zhang tmp[i] = sum * aa[a->diag[i]]; 1263da3a660dSBarry Smith x[*c--] += tmp[i]; 1264da3a660dSBarry Smith } 1265da3a660dSBarry Smith 12669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12689566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12709566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 12713a40ed3dSBarry Smith PetscFunctionReturn(0); 1272da3a660dSBarry Smith } 127304fbf559SBarry Smith 12749371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) { 12753c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 12763c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 12773c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 12783c0119dfSShri Abhyankar PetscInt nz; 12793c0119dfSShri Abhyankar const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 12803c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 12813c0119dfSShri Abhyankar const PetscScalar *b; 12823c0119dfSShri Abhyankar const MatScalar *aa = a->a, *v; 12833c0119dfSShri Abhyankar 12843c0119dfSShri Abhyankar PetscFunctionBegin; 12859566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 12863c0119dfSShri Abhyankar 12879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12889566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 12893c0119dfSShri Abhyankar tmp = a->solve_work; 12903c0119dfSShri Abhyankar 12919371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12929371c9d4SSatish Balay r = rout; 12939371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12949371c9d4SSatish Balay c = cout; 12953c0119dfSShri Abhyankar 12963c0119dfSShri Abhyankar /* forward solve the lower triangular */ 12973c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 12983c0119dfSShri Abhyankar v = aa; 12993c0119dfSShri Abhyankar vi = aj; 13003c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 13013c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 13023c0119dfSShri Abhyankar sum = b[r[i]]; 13033c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 13043c0119dfSShri Abhyankar tmp[i] = sum; 13052205254eSKarl Rupp v += nz; 13062205254eSKarl Rupp vi += nz; 13073c0119dfSShri Abhyankar } 13083c0119dfSShri Abhyankar 13093c0119dfSShri Abhyankar /* backward solve the upper triangular */ 13103c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 13113c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 13123c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 13133c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 13143c0119dfSShri Abhyankar sum = tmp[i]; 13153c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 13163c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 13173c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 13189371c9d4SSatish Balay v += nz + 1; 13199371c9d4SSatish Balay vi += nz + 1; 13203c0119dfSShri Abhyankar } 13213c0119dfSShri Abhyankar 13229566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13239566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 13273c0119dfSShri Abhyankar PetscFunctionReturn(0); 13283c0119dfSShri Abhyankar } 13293c0119dfSShri Abhyankar 13309371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 1331416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13322235e667SBarry Smith IS iscol = a->col, isrow = a->row; 133304fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 133404fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 133504fbf559SBarry Smith PetscInt nz; 133604fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1337dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 133804fbf559SBarry Smith const PetscScalar *b; 1339da3a660dSBarry Smith 13403a40ed3dSBarry Smith PetscFunctionBegin; 13419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13429566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1343416022c9SBarry Smith tmp = a->solve_work; 1344da3a660dSBarry Smith 13459371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13469371c9d4SSatish Balay r = rout; 13479371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13489371c9d4SSatish Balay c = cout; 1349da3a660dSBarry Smith 1350da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 13512235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1352da3a660dSBarry Smith 1353da3a660dSBarry Smith /* forward solve the U^T */ 1354da3a660dSBarry Smith for (i = 0; i < n; i++) { 1355010a20caSHong Zhang v = aa + diag[i]; 1356010a20caSHong Zhang vi = aj + diag[i] + 1; 1357f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1358c38d4ed2SBarry Smith s1 = tmp[i]; 1359ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 136004fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1361c38d4ed2SBarry Smith tmp[i] = s1; 1362da3a660dSBarry Smith } 1363da3a660dSBarry Smith 1364da3a660dSBarry Smith /* backward solve the L^T */ 1365da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1366010a20caSHong Zhang v = aa + diag[i] - 1; 1367010a20caSHong Zhang vi = aj + diag[i] - 1; 1368f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1369f1af5d2fSBarry Smith s1 = tmp[i]; 137004fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1371da3a660dSBarry Smith } 1372da3a660dSBarry Smith 1373da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1374591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1375da3a660dSBarry Smith 13769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13779566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1380da3a660dSBarry Smith 13819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13823a40ed3dSBarry Smith PetscFunctionReturn(0); 1383da3a660dSBarry Smith } 1384da3a660dSBarry Smith 13859371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) { 1386d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1387d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1388d1fa9404SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1389d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1390d1fa9404SShri Abhyankar PetscInt nz; 1391d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1392d1fa9404SShri Abhyankar const MatScalar *aa = a->a, *v; 1393d1fa9404SShri Abhyankar const PetscScalar *b; 1394d1fa9404SShri Abhyankar 1395d1fa9404SShri Abhyankar PetscFunctionBegin; 13969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13979566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1398d1fa9404SShri Abhyankar tmp = a->solve_work; 1399d1fa9404SShri Abhyankar 14009371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14019371c9d4SSatish Balay r = rout; 14029371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14039371c9d4SSatish Balay c = cout; 1404d1fa9404SShri Abhyankar 1405d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1406d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1407d1fa9404SShri Abhyankar 1408d1fa9404SShri Abhyankar /* forward solve the U^T */ 1409d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1410d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1411d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1412d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1413d1fa9404SShri Abhyankar s1 = tmp[i]; 1414d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1415d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1416d1fa9404SShri Abhyankar tmp[i] = s1; 1417d1fa9404SShri Abhyankar } 1418d1fa9404SShri Abhyankar 1419d1fa9404SShri Abhyankar /* backward solve the L^T */ 1420d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1421d1fa9404SShri Abhyankar v = aa + ai[i]; 1422d1fa9404SShri Abhyankar vi = aj + ai[i]; 1423d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1424d1fa9404SShri Abhyankar s1 = tmp[i]; 1425d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1426d1fa9404SShri Abhyankar } 1427d1fa9404SShri Abhyankar 1428d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1429d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1430d1fa9404SShri Abhyankar 14319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14339566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14349566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1435d1fa9404SShri Abhyankar 14369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1437d1fa9404SShri Abhyankar PetscFunctionReturn(0); 1438d1fa9404SShri Abhyankar } 1439d1fa9404SShri Abhyankar 14409371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) { 1441416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 14422235e667SBarry Smith IS iscol = a->col, isrow = a->row; 144304fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 144404fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 144504fbf559SBarry Smith PetscInt nz; 144604fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1447dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 144804fbf559SBarry Smith const PetscScalar *b; 14496abc6512SBarry Smith 14503a40ed3dSBarry Smith PetscFunctionBegin; 14519566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 14529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14539566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1454416022c9SBarry Smith tmp = a->solve_work; 14556abc6512SBarry Smith 14569371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14579371c9d4SSatish Balay r = rout; 14589371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14599371c9d4SSatish Balay c = cout; 14606abc6512SBarry Smith 14616abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 14622235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 14636abc6512SBarry Smith 14646abc6512SBarry Smith /* forward solve the U^T */ 14656abc6512SBarry Smith for (i = 0; i < n; i++) { 1466010a20caSHong Zhang v = aa + diag[i]; 1467010a20caSHong Zhang vi = aj + diag[i] + 1; 1468f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 146904fbf559SBarry Smith s1 = tmp[i]; 147004fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 147104fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 147204fbf559SBarry Smith tmp[i] = s1; 14736abc6512SBarry Smith } 14746abc6512SBarry Smith 14756abc6512SBarry Smith /* backward solve the L^T */ 14766abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1477010a20caSHong Zhang v = aa + diag[i] - 1; 1478010a20caSHong Zhang vi = aj + diag[i] - 1; 1479f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 148004fbf559SBarry Smith s1 = tmp[i]; 148104fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 14826abc6512SBarry Smith } 14836abc6512SBarry Smith 14846abc6512SBarry Smith /* copy tmp into x according to permutation */ 14856abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 14866abc6512SBarry Smith 14879566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14889566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 14916abc6512SBarry Smith 14929566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14933a40ed3dSBarry Smith PetscFunctionReturn(0); 1494da3a660dSBarry Smith } 149504fbf559SBarry Smith 14969371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) { 1497533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1498533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1499533e6011SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1500533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1501533e6011SShri Abhyankar PetscInt nz; 1502533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1503533e6011SShri Abhyankar const MatScalar *aa = a->a, *v; 1504533e6011SShri Abhyankar const PetscScalar *b; 1505533e6011SShri Abhyankar 1506533e6011SShri Abhyankar PetscFunctionBegin; 15079566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 15089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 15099566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1510533e6011SShri Abhyankar tmp = a->solve_work; 1511533e6011SShri Abhyankar 15129371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 15139371c9d4SSatish Balay r = rout; 15149371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 15159371c9d4SSatish Balay c = cout; 1516533e6011SShri Abhyankar 1517533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1518533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1519533e6011SShri Abhyankar 1520533e6011SShri Abhyankar /* forward solve the U^T */ 1521533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1522533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1523533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1524533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1525533e6011SShri Abhyankar s1 = tmp[i]; 1526533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1527533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1528533e6011SShri Abhyankar tmp[i] = s1; 1529533e6011SShri Abhyankar } 1530533e6011SShri Abhyankar 1531533e6011SShri Abhyankar /* backward solve the L^T */ 1532533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1533533e6011SShri Abhyankar v = aa + ai[i]; 1534533e6011SShri Abhyankar vi = aj + ai[i]; 1535533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1536533e6011SShri Abhyankar s1 = tmp[i]; 1537c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1538533e6011SShri Abhyankar } 1539533e6011SShri Abhyankar 1540533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1541533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1542533e6011SShri Abhyankar 15439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 15459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1547533e6011SShri Abhyankar 15489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1549533e6011SShri Abhyankar PetscFunctionReturn(0); 1550533e6011SShri Abhyankar } 1551533e6011SShri Abhyankar 15529e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 1553b47f5a65SHong Zhang 15548fc3a347SHong Zhang /* 15558a76ed4fSHong Zhang ilu() under revised new data structure. 1556813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1557813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1558813a1f2bSShri Abhyankar 1559813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1560baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1561baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1562baabb860SHong Zhang 1563813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1564baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1565a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1566813a1f2bSShri Abhyankar 1567813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1568813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1569813a1f2bSShri Abhyankar */ 15709371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1571813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1572bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1573bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 15741df811f5SHong Zhang IS isicol; 1575813a1f2bSShri Abhyankar 1576813a1f2bSShri Abhyankar PetscFunctionBegin; 15779566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 15789566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1579813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 1580813a1f2bSShri Abhyankar 1581813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 15829566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 15839566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, ai[n] * (sizeof(PetscScalar) + sizeof(PetscInt)) + (n + 1) * sizeof(PetscInt))); 15842205254eSKarl Rupp 1585813a1f2bSShri Abhyankar b->singlemalloc = PETSC_TRUE; 1586813a1f2bSShri Abhyankar if (!b->diag) { 15879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->diag)); 15889566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, (n + 1) * sizeof(PetscInt))); 1589813a1f2bSShri Abhyankar } 1590813a1f2bSShri Abhyankar bdiag = b->diag; 1591813a1f2bSShri Abhyankar 1592*48a46eb9SPierre Jolivet if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1593813a1f2bSShri Abhyankar 1594813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1595813a1f2bSShri Abhyankar bi = b->i; 1596813a1f2bSShri Abhyankar bj = b->j; 1597813a1f2bSShri Abhyankar 1598813a1f2bSShri Abhyankar /* L part */ 1599e60cf9a0SBarry Smith bi[0] = 0; 1600813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1601813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1602813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1603813a1f2bSShri Abhyankar aj = a->j + ai[i]; 16049371c9d4SSatish Balay for (j = 0; j < nz; j++) { bj[k++] = aj[j]; } 1605813a1f2bSShri Abhyankar } 1606813a1f2bSShri Abhyankar 1607813a1f2bSShri Abhyankar /* U part */ 160862fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1609813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1610813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1611813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 16129371c9d4SSatish Balay for (j = 0; j < nz; j++) { bj[k++] = aj[j]; } 1613813a1f2bSShri Abhyankar /* diag[i] */ 1614bbd65245SShri Abhyankar bj[k++] = i; 1615a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1616813a1f2bSShri Abhyankar } 16171df811f5SHong Zhang 1618d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 16191df811f5SHong Zhang fact->info.factor_mallocs = 0; 16201df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 16211df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 162235233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 16239566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 16241df811f5SHong Zhang 16251df811f5SHong Zhang b = (Mat_SeqAIJ *)(fact)->data; 16261df811f5SHong Zhang b->row = isrow; 16271df811f5SHong Zhang b->col = iscol; 16281df811f5SHong Zhang b->icol = isicol; 16299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 16309566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 16319566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1632813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1633813a1f2bSShri Abhyankar } 1634813a1f2bSShri Abhyankar 16359371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1636813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1637813a1f2bSShri Abhyankar IS isicol; 1638813a1f2bSShri Abhyankar const PetscInt *r, *ic; 16391df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1640813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1641813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1642813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1643035f4963SHong Zhang PetscBool col_identity, row_identity, missing; 1644813a1f2bSShri Abhyankar PetscReal f; 16450298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1646813a1f2bSShri Abhyankar PetscBT lnkbt; 1647813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 16480298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 16490298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1650813a1f2bSShri Abhyankar 1651813a1f2bSShri Abhyankar PetscFunctionBegin; 165208401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 16539566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 165428b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1655ad04f41aSHong Zhang 1656813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 16579566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 16589566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1659813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1660813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 16619566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 16629371c9d4SSatish Balay if (a->inode.size) { fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 1663813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1664813a1f2bSShri Abhyankar } 1665813a1f2bSShri Abhyankar 16669566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 16679566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 16689566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1669813a1f2bSShri Abhyankar 16701bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 16719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 16729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1673e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 16749566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1675813a1f2bSShri Abhyankar 1676813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1677813a1f2bSShri Abhyankar nlnk = n + 1; 16789566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1679813a1f2bSShri Abhyankar 1680813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 16811df811f5SHong Zhang f = info->fill; 16821df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 16839566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1684813a1f2bSShri Abhyankar current_space = free_space; 16859566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1686813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1687813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1688813a1f2bSShri Abhyankar nzi = 0; 1689813a1f2bSShri Abhyankar /* copy current row into linked list */ 1690813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 169128b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1692813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1693813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 16949566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1695813a1f2bSShri Abhyankar nzi += nlnk; 1696813a1f2bSShri Abhyankar 1697813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1698813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1699813a1f2bSShri Abhyankar fm = n; 1700813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1701813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1702813a1f2bSShri Abhyankar lnk[fm] = i; 1703813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 17049371c9d4SSatish Balay nzi++; 17059371c9d4SSatish Balay dcount++; 1706813a1f2bSShri Abhyankar } 1707813a1f2bSShri Abhyankar 1708813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1709813a1f2bSShri Abhyankar nzbd = 0; 1710813a1f2bSShri Abhyankar prow = lnk[n]; 1711813a1f2bSShri Abhyankar while (prow < i) { 1712813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1713813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1714813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1715813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 17169566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1717813a1f2bSShri Abhyankar nzi += nlnk; 1718813a1f2bSShri Abhyankar prow = lnk[prow]; 1719813a1f2bSShri Abhyankar nzbd++; 1720813a1f2bSShri Abhyankar } 1721813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1722813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1723813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1724813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1725f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 17269566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 17279566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1728813a1f2bSShri Abhyankar reallocs++; 1729813a1f2bSShri Abhyankar } 1730813a1f2bSShri Abhyankar 1731813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 17329566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1733813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1734813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1735813a1f2bSShri Abhyankar 1736813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 173708401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1738813a1f2bSShri Abhyankar 1739813a1f2bSShri Abhyankar current_space->array += nzi; 1740813a1f2bSShri Abhyankar current_space->local_used += nzi; 1741813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1742813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1743813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1744813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1745813a1f2bSShri Abhyankar } 1746813a1f2bSShri Abhyankar 17479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 17489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1749813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 17509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 17519566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1752813a1f2bSShri Abhyankar 17539566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 17549566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 17559566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1756813a1f2bSShri Abhyankar 1757813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1758813a1f2bSShri Abhyankar { 1759aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 17609566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 17619566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 17629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 17639566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 1764*48a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1765813a1f2bSShri Abhyankar } 1766813a1f2bSShri Abhyankar #endif 1767813a1f2bSShri Abhyankar /* put together the new matrix */ 17689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 17699566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol)); 1770813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 17712205254eSKarl Rupp 1772813a1f2bSShri Abhyankar b->free_a = PETSC_TRUE; 1773813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 1774813a1f2bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 17752205254eSKarl Rupp 17769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 17772205254eSKarl Rupp 1778813a1f2bSShri Abhyankar b->j = bj; 1779813a1f2bSShri Abhyankar b->i = bi; 1780813a1f2bSShri Abhyankar b->diag = bdiag; 1781f4259b30SLisandro Dalcin b->ilen = NULL; 1782f4259b30SLisandro Dalcin b->imax = NULL; 1783813a1f2bSShri Abhyankar b->row = isrow; 1784813a1f2bSShri Abhyankar b->col = iscol; 17859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 17869566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1787813a1f2bSShri Abhyankar b->icol = isicol; 17882205254eSKarl Rupp 17899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1790813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1791813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 17929566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 1793f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 17942205254eSKarl Rupp 1795813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = reallocs; 1796813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = f; 1797f268cba8SShri Abhyankar (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 179835233d99SShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 17999371c9d4SSatish Balay if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 18009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1801813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1802813a1f2bSShri Abhyankar } 1803813a1f2bSShri Abhyankar 18049371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 18056516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 18066516239fSHong Zhang IS isicol; 18076516239fSHong Zhang const PetscInt *r, *ic; 1808ece542f9SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 18096516239fSHong Zhang PetscInt *bi, *cols, nnz, *cols_lvl; 18106516239fSHong Zhang PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 18116516239fSHong Zhang PetscInt i, levels, diagonal_fill; 1812ace3abfcSBarry Smith PetscBool col_identity, row_identity; 18136516239fSHong Zhang PetscReal f; 18140298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 18156516239fSHong Zhang PetscBT lnkbt; 18166516239fSHong Zhang PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 18170298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 18180298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1819ace3abfcSBarry Smith PetscBool missing; 18206516239fSHong Zhang 18216516239fSHong Zhang PetscFunctionBegin; 182208401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 18239566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 182428b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1825ece542f9SHong Zhang 18266516239fSHong Zhang f = info->fill; 18276516239fSHong Zhang levels = (PetscInt)info->levels; 18286516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 18292205254eSKarl Rupp 18309566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 18316516239fSHong Zhang 18329566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 18339566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 18346516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 18359566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 18362205254eSKarl Rupp 1837ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 18389371c9d4SSatish Balay if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 1839d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 1840719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 1841719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 1842719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 18432205254eSKarl Rupp 1844719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 184508480c60SBarry Smith b->row = isrow; 184608480c60SBarry Smith b->col = iscol; 184782bf6240SBarry Smith b->icol = isicol; 18489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 18499566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 18509566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 18513a40ed3dSBarry Smith PetscFunctionReturn(0); 185208480c60SBarry Smith } 185308480c60SBarry Smith 18549566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 18559566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 18569e25ed09SBarry Smith 18571bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 18589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 18599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1860a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 18616cf997b0SBarry Smith 18629566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 18635a8e39fbSHong Zhang 18645a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 18655a8e39fbSHong Zhang nlnk = n + 1; 18669566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 18675a8e39fbSHong Zhang 18685a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 18699566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 18705a8e39fbSHong Zhang current_space = free_space; 18719566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 18725a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 18735a8e39fbSHong Zhang 18745a8e39fbSHong Zhang for (i = 0; i < n; i++) { 18755a8e39fbSHong Zhang nzi = 0; 18766cf997b0SBarry Smith /* copy current row into linked list */ 18775a8e39fbSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 187828b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 18795a8e39fbSHong Zhang cols = aj + ai[r[i]]; 18805a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 18819566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 18825a8e39fbSHong Zhang nzi += nlnk; 18836cf997b0SBarry Smith 18846cf997b0SBarry Smith /* make sure diagonal entry is included */ 18855a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 18866cf997b0SBarry Smith fm = n; 18875a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 18885a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 18895a8e39fbSHong Zhang lnk[fm] = i; 18905a8e39fbSHong Zhang lnk_lvl[i] = 0; 18919371c9d4SSatish Balay nzi++; 18929371c9d4SSatish Balay dcount++; 18936cf997b0SBarry Smith } 18946cf997b0SBarry Smith 18955a8e39fbSHong Zhang /* add pivot rows into the active row */ 18965a8e39fbSHong Zhang nzbd = 0; 18975a8e39fbSHong Zhang prow = lnk[n]; 18985a8e39fbSHong Zhang while (prow < i) { 18995a8e39fbSHong Zhang nnz = bdiag[prow]; 19005a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 19015a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 19025a8e39fbSHong Zhang nnz = bi[prow + 1] - bi[prow] - nnz - 1; 19039566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 19045a8e39fbSHong Zhang nzi += nlnk; 19055a8e39fbSHong Zhang prow = lnk[prow]; 19065a8e39fbSHong Zhang nzbd++; 190756beaf04SBarry Smith } 19085a8e39fbSHong Zhang bdiag[i] = nzbd; 19095a8e39fbSHong Zhang bi[i + 1] = bi[i] + nzi; 1910ecf371e4SBarry Smith 19115a8e39fbSHong Zhang /* if free space is not available, make more free space */ 19125a8e39fbSHong Zhang if (current_space->local_remaining < nzi) { 1913f91af8c7SBarry Smith nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 19149566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 19159566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 19165a8e39fbSHong Zhang reallocs++; 19175a8e39fbSHong Zhang } 1918ecf371e4SBarry Smith 19195a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 19209566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 19215a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 19225a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 19235a8e39fbSHong Zhang 19245a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 192508401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 19265a8e39fbSHong Zhang 19275a8e39fbSHong Zhang current_space->array += nzi; 19285a8e39fbSHong Zhang current_space->local_used += nzi; 19295a8e39fbSHong Zhang current_space->local_remaining -= nzi; 19305a8e39fbSHong Zhang current_space_lvl->array += nzi; 19315a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 19325a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 19339e25ed09SBarry Smith } 19345a8e39fbSHong Zhang 19359566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 19369566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 19375a8e39fbSHong Zhang 19385a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 19399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 19409566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 19419566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 19429566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 19439566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 19449e25ed09SBarry Smith 19456cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1946f64065bbSBarry Smith { 19475a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 19489566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 19499566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 19509566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 19519566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 1952*48a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1953f64065bbSBarry Smith } 195463ba0a88SBarry Smith #endif 1955bbb6d6a8SBarry Smith 19569e25ed09SBarry Smith /* put together the new matrix */ 19579566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 19589566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol)); 1959719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 19602205254eSKarl Rupp 1961e6b907acSBarry Smith b->free_a = PETSC_TRUE; 1962e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 19637c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 19642205254eSKarl Rupp 19659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n], &b->a)); 19665a8e39fbSHong Zhang b->j = bj; 19675a8e39fbSHong Zhang b->i = bi; 19685a8e39fbSHong Zhang for (i = 0; i < n; i++) bdiag[i] += bi[i]; 19695a8e39fbSHong Zhang b->diag = bdiag; 1970f4259b30SLisandro Dalcin b->ilen = NULL; 1971f4259b30SLisandro Dalcin b->imax = NULL; 1972416022c9SBarry Smith b->row = isrow; 1973416022c9SBarry Smith b->col = iscol; 19749566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 19759566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 197682bf6240SBarry Smith b->icol = isicol; 19779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1978416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 19795a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 19809566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 19815a8e39fbSHong Zhang b->maxnz = b->nz = bi[n]; 19822205254eSKarl Rupp 1983719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 1984719d5645SBarry Smith (fact)->info.fill_ratio_given = f; 1985719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1986ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 19879371c9d4SSatish Balay if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 19883a40ed3dSBarry Smith PetscFunctionReturn(0); 19899e25ed09SBarry Smith } 1990d5d45c9bSBarry Smith 19919371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 19925f44c854SHong Zhang Mat C = B; 19935f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 19945f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 19955f44c854SHong Zhang IS ip = b->row, iip = b->icol; 19965f44c854SHong Zhang const PetscInt *rip, *riip; 19975f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 19985f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 19995f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 20005f44c854SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2001ace3abfcSBarry Smith PetscBool perm_identity; 2002d90ac83dSHong Zhang FactorShiftCtx sctx; 200398a93161SHong Zhang PetscReal rs; 200498a93161SHong Zhang MatScalar d, *v; 200598a93161SHong Zhang 20065f44c854SHong Zhang PetscFunctionBegin; 200798a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 20089566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 200998a93161SHong Zhang 2010f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 201198a93161SHong Zhang sctx.shift_top = info->zeropivot; 201298a93161SHong Zhang for (i = 0; i < mbs; i++) { 201398a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 201498a93161SHong Zhang d = (aa)[a->diag[i]]; 201598a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 201698a93161SHong Zhang v = aa + ai[i]; 201798a93161SHong Zhang nz = ai[i + 1] - ai[i]; 20182205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 201998a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 202098a93161SHong Zhang } 202198a93161SHong Zhang sctx.shift_top *= 1.1; 202298a93161SHong Zhang sctx.nshift_max = 5; 202398a93161SHong Zhang sctx.shift_lo = 0.; 202498a93161SHong Zhang sctx.shift_hi = 1.; 202598a93161SHong Zhang } 20265f44c854SHong Zhang 20279566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 20289566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 20295f44c854SHong Zhang 20305f44c854SHong Zhang /* allocate working arrays 20315f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 20325f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 20335f44c854SHong Zhang */ 20349566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 20355f44c854SHong Zhang 203698a93161SHong Zhang do { 203707b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 203898a93161SHong Zhang 2039c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 20402e987568SSatish Balay if (mbs) il[0] = 0; 20415f44c854SHong Zhang 20425f44c854SHong Zhang for (k = 0; k < mbs; k++) { 20435f44c854SHong Zhang /* zero rtmp */ 20445f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 20455f44c854SHong Zhang bjtmp = bj + bi[k]; 20465f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 20475f44c854SHong Zhang 20485f44c854SHong Zhang /* load in initial unfactored row */ 20495f44c854SHong Zhang bval = ba + bi[k]; 20509371c9d4SSatish Balay jmin = ai[rip[k]]; 20519371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 20525f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 20535f44c854SHong Zhang col = riip[aj[j]]; 20545f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 20555f44c854SHong Zhang rtmp[col] = aa[j]; 20565f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 20575f44c854SHong Zhang } 20585f44c854SHong Zhang } 205998a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 206098a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 20615f44c854SHong Zhang 20625f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 20635f44c854SHong Zhang dk = rtmp[k]; 20645f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 20655f44c854SHong Zhang 20665f44c854SHong Zhang while (i < k) { 20675f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 20685f44c854SHong Zhang 20695f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 20705f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 20715f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 20725f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 20735f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 20745f44c854SHong Zhang 20755f44c854SHong Zhang /* add multiple of row i to k-th row */ 20769371c9d4SSatish Balay jmin = ili + 1; 20779371c9d4SSatish Balay jmax = bi[i + 1]; 20785f44c854SHong Zhang if (jmin < jmax) { 20795f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 20805f44c854SHong Zhang /* update il and c2r for row i */ 20815f44c854SHong Zhang il[i] = jmin; 20829371c9d4SSatish Balay j = bj[jmin]; 20839371c9d4SSatish Balay c2r[i] = c2r[j]; 20849371c9d4SSatish Balay c2r[j] = i; 20855f44c854SHong Zhang } 20865f44c854SHong Zhang i = nexti; 20875f44c854SHong Zhang } 20885f44c854SHong Zhang 20895f44c854SHong Zhang /* copy data into U(k,:) */ 209098a93161SHong Zhang rs = 0.0; 20919371c9d4SSatish Balay jmin = bi[k]; 20929371c9d4SSatish Balay jmax = bi[k + 1] - 1; 20935f44c854SHong Zhang if (jmin < jmax) { 20945f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 20959371c9d4SSatish Balay col = bj[j]; 20969371c9d4SSatish Balay ba[j] = rtmp[col]; 20979371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 20985f44c854SHong Zhang } 20995f44c854SHong Zhang /* add the k-th row into il and c2r */ 21005f44c854SHong Zhang il[k] = jmin; 21019371c9d4SSatish Balay i = bj[jmin]; 21029371c9d4SSatish Balay c2r[k] = c2r[i]; 21039371c9d4SSatish Balay c2r[i] = k; 21045f44c854SHong Zhang } 210598a93161SHong Zhang 210698a93161SHong Zhang /* MatPivotCheck() */ 210798a93161SHong Zhang sctx.rs = rs; 210898a93161SHong Zhang sctx.pv = dk; 21099566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 211007b50cabSHong Zhang if (sctx.newshift) break; 211198a93161SHong Zhang dk = sctx.pv; 211298a93161SHong Zhang 211398a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 211498a93161SHong Zhang } 211507b50cabSHong Zhang } while (sctx.newshift); 21165f44c854SHong Zhang 21179566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 21189566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 21199566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 21205f44c854SHong Zhang 21219566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 21225f44c854SHong Zhang if (perm_identity) { 212335233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 212435233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 21252169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 21262169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 21275f44c854SHong Zhang } else { 212835233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 212935233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 213035233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 213135233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 21325f44c854SHong Zhang } 21335f44c854SHong Zhang 21345f44c854SHong Zhang C->assembled = PETSC_TRUE; 21355f44c854SHong Zhang C->preallocated = PETSC_TRUE; 21362205254eSKarl Rupp 21379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 213898a93161SHong Zhang 213998a93161SHong Zhang /* MatPivotView() */ 214098a93161SHong Zhang if (sctx.nshift) { 2141f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 21429566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2143f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 21449566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2145f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 21469566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 214798a93161SHong Zhang } 214898a93161SHong Zhang } 21495f44c854SHong Zhang PetscFunctionReturn(0); 21505f44c854SHong Zhang } 21515f44c854SHong Zhang 21529371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 2153719d5645SBarry Smith Mat C = B; 21540968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 21552ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 21569bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 21575d0c19d7SBarry Smith const PetscInt *rip, *riip; 2158c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 21592ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 21602ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2161622e2028SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2162ace3abfcSBarry Smith PetscBool perm_identity; 21630e95ead3SHong Zhang FactorShiftCtx sctx; 21640e95ead3SHong Zhang PetscReal rs; 21650e95ead3SHong Zhang MatScalar d, *v; 2166d5d45c9bSBarry Smith 2167a6175056SHong Zhang PetscFunctionBegin; 21680e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 21699566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 21700e95ead3SHong Zhang 21710e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 21720e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 21730e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 21740e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 21750e95ead3SHong Zhang d = (aa)[a->diag[i]]; 21760e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 21770e95ead3SHong Zhang v = aa + ai[i]; 21780e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 21792205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 21800e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 21810e95ead3SHong Zhang } 21820e95ead3SHong Zhang sctx.shift_top *= 1.1; 21830e95ead3SHong Zhang sctx.nshift_max = 5; 21840e95ead3SHong Zhang sctx.shift_lo = 0.; 21850e95ead3SHong Zhang sctx.shift_hi = 1.; 21860e95ead3SHong Zhang } 2187ee45ca4aSHong Zhang 21889566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 21899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 21902ed133dbSHong Zhang 21912ed133dbSHong Zhang /* initialization */ 21929566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 21930e95ead3SHong Zhang 21942ed133dbSHong Zhang do { 219507b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 21960e95ead3SHong Zhang 2197c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 2198c5546cabSHong Zhang il[0] = 0; 21992ed133dbSHong Zhang 22002ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 2201c5546cabSHong Zhang /* zero rtmp */ 2202c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 2203c5546cabSHong Zhang bjtmp = bj + bi[k]; 2204c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2205c5546cabSHong Zhang 2206c05c3958SHong Zhang bval = ba + bi[k]; 22072ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 22089371c9d4SSatish Balay jmin = ai[rip[k]]; 22099371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 22102ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 22119bfd6278SHong Zhang col = riip[aj[j]]; 22122ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 22132ed133dbSHong Zhang rtmp[col] = aa[j]; 2214c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 22152ed133dbSHong Zhang } 22162ed133dbSHong Zhang } 221739e3d630SHong Zhang /* shift the diagonal of the matrix */ 2218540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 22192ed133dbSHong Zhang 22202ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 22212ed133dbSHong Zhang dk = rtmp[k]; 22222ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 22232ed133dbSHong Zhang 22242ed133dbSHong Zhang while (i < k) { 22252ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 22262ed133dbSHong Zhang 22272ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 22282ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 22292ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 22302ed133dbSHong Zhang dk += uikdi * ba[ili]; 22312ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 22322ed133dbSHong Zhang 22332ed133dbSHong Zhang /* add multiple of row i to k-th row */ 22349371c9d4SSatish Balay jmin = ili + 1; 22359371c9d4SSatish Balay jmax = bi[i + 1]; 22362ed133dbSHong Zhang if (jmin < jmax) { 22372ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 22382ed133dbSHong Zhang /* update il and jl for row i */ 22392ed133dbSHong Zhang il[i] = jmin; 22409371c9d4SSatish Balay j = bj[jmin]; 22419371c9d4SSatish Balay jl[i] = jl[j]; 22429371c9d4SSatish Balay jl[j] = i; 22432ed133dbSHong Zhang } 22442ed133dbSHong Zhang i = nexti; 22452ed133dbSHong Zhang } 22462ed133dbSHong Zhang 22470697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 22480697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 22490697b6caSHong Zhang rs = 0.0; 22503c9e8598SHong Zhang jmin = bi[k] + 1; 22513c9e8598SHong Zhang nz = bi[k + 1] - jmin; 22523c9e8598SHong Zhang bcol = bj + jmin; 22539371c9d4SSatish Balay for (j = 0; j < nz; j++) { rs += PetscAbsScalar(rtmp[bcol[j]]); } 2254540703acSHong Zhang 2255540703acSHong Zhang sctx.rs = rs; 2256540703acSHong Zhang sctx.pv = dk; 22579566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 225807b50cabSHong Zhang if (sctx.newshift) break; 22590e95ead3SHong Zhang dk = sctx.pv; 22603c9e8598SHong Zhang 22613c9e8598SHong Zhang /* copy data into U(k,:) */ 226239e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 22639371c9d4SSatish Balay jmin = bi[k] + 1; 22649371c9d4SSatish Balay jmax = bi[k + 1]; 226539e3d630SHong Zhang if (jmin < jmax) { 226639e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 22679371c9d4SSatish Balay col = bj[j]; 22689371c9d4SSatish Balay ba[j] = rtmp[col]; 22693c9e8598SHong Zhang } 227039e3d630SHong Zhang /* add the k-th row into il and jl */ 22713c9e8598SHong Zhang il[k] = jmin; 22729371c9d4SSatish Balay i = bj[jmin]; 22739371c9d4SSatish Balay jl[k] = jl[i]; 22749371c9d4SSatish Balay jl[i] = k; 22753c9e8598SHong Zhang } 22763c9e8598SHong Zhang } 227707b50cabSHong Zhang } while (sctx.newshift); 22780e95ead3SHong Zhang 22799566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 22809566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 22819566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2282db4efbfdSBarry Smith 22839566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2284db4efbfdSBarry Smith if (perm_identity) { 22850a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22860a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22870a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22880a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2289db4efbfdSBarry Smith } else { 22900a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 22910a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 22920a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 22930a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2294db4efbfdSBarry Smith } 2295db4efbfdSBarry Smith 22963c9e8598SHong Zhang C->assembled = PETSC_TRUE; 22973c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 22982205254eSKarl Rupp 22999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2300540703acSHong Zhang if (sctx.nshift) { 2301f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 23029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2303f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 23049566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 23050697b6caSHong Zhang } 23063c9e8598SHong Zhang } 23073c9e8598SHong Zhang PetscFunctionReturn(0); 23083c9e8598SHong Zhang } 2309a6175056SHong Zhang 23108a76ed4fSHong Zhang /* 23118a76ed4fSHong Zhang icc() under revised new data structure. 23128a76ed4fSHong Zhang Factored arrays bj and ba are stored as 23138a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 23148a76ed4fSHong Zhang 23158a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 23168a76ed4fSHong Zhang ui+ 23178a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 23188a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 23198a76ed4fSHong Zhang 23208a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 23218a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 23228a76ed4fSHong Zhang 23238a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 23248a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 23258a76ed4fSHong Zhang */ 23268a76ed4fSHong Zhang 23279371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 23280968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2329ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2330ace3abfcSBarry Smith PetscBool perm_identity, missing; 23315f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 23325d0c19d7SBarry Smith const PetscInt *rip, *riip; 2333ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 23340298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 23355a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2336ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 23370298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 23380298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 23397a48dd6fSHong Zhang PetscBT lnkbt; 2340b635d36bSHong Zhang IS iperm; 2341a6175056SHong Zhang 2342a6175056SHong Zhang PetscFunctionBegin; 234308401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 23449566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 234528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 23469566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 23479566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 23487a48dd6fSHong Zhang 23499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 23509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 235139e3d630SHong Zhang ui[0] = 0; 235239e3d630SHong Zhang 23538a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2354622e2028SHong Zhang if (!levels && perm_identity) { 23555f44c854SHong Zhang for (i = 0; i < am; i++) { 2356c5546cabSHong Zhang ncols = ai[i + 1] - a->diag[i]; 2357c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2358c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2359dc1e2a3fSHong Zhang } 23609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2361dc1e2a3fSHong Zhang cols = uj; 2362dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 23635f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2364dc1e2a3fSHong Zhang ncols = ai[i + 1] - a->diag[i] - 1; 23655f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2366910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 23675f44c854SHong Zhang } 23685f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 23699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 23709566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 23715f44c854SHong Zhang 23725f44c854SHong Zhang /* initialization */ 23739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 23745f44c854SHong Zhang 23755f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 23765f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 23779566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 23785f44c854SHong Zhang for (i = 0; i < am; i++) { 23799371c9d4SSatish Balay jl[i] = am; 23809371c9d4SSatish Balay il[i] = 0; 23815f44c854SHong Zhang } 23825f44c854SHong Zhang 23835f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 23845f44c854SHong Zhang nlnk = am + 1; 23859566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 23865f44c854SHong Zhang 238795b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 23889566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 23895f44c854SHong Zhang current_space = free_space; 23909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 23915f44c854SHong Zhang current_space_lvl = free_space_lvl; 23925f44c854SHong Zhang 23935f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 23945f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 23955f44c854SHong Zhang nzk = 0; 23965f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 239728b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 23985f44c854SHong Zhang ncols_upper = 0; 23995f44c854SHong Zhang for (j = 0; j < ncols; j++) { 24005f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 24015f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 24025f44c854SHong Zhang ajtmp[ncols_upper] = i; 24035f44c854SHong Zhang ncols_upper++; 24045f44c854SHong Zhang } 24055f44c854SHong Zhang } 24069566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 24075f44c854SHong Zhang nzk += nlnk; 24085f44c854SHong Zhang 24095f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 24105f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 24115f44c854SHong Zhang 24125f44c854SHong Zhang while (prow < k) { 24135f44c854SHong Zhang nextprow = jl[prow]; 24145f44c854SHong Zhang 24155f44c854SHong Zhang /* merge prow into k-th row */ 24165f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 24175f44c854SHong Zhang jmax = ui[prow + 1]; 24185f44c854SHong Zhang ncols = jmax - jmin; 24195f44c854SHong Zhang i = jmin - ui[prow]; 24205f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 24215f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 24225f44c854SHong Zhang j = *(uj - 1); 24239566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 24245f44c854SHong Zhang nzk += nlnk; 24255f44c854SHong Zhang 24265f44c854SHong Zhang /* update il and jl for prow */ 24275f44c854SHong Zhang if (jmin < jmax) { 24285f44c854SHong Zhang il[prow] = jmin; 24299371c9d4SSatish Balay j = *cols; 24309371c9d4SSatish Balay jl[prow] = jl[j]; 24319371c9d4SSatish Balay jl[j] = prow; 24325f44c854SHong Zhang } 24335f44c854SHong Zhang prow = nextprow; 24345f44c854SHong Zhang } 24355f44c854SHong Zhang 24365f44c854SHong Zhang /* if free space is not available, make more free space */ 24375f44c854SHong Zhang if (current_space->local_remaining < nzk) { 24385f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2439f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 24409566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 24419566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 24425f44c854SHong Zhang reallocs++; 24435f44c854SHong Zhang } 24445f44c854SHong Zhang 24455f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 244608401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 24479566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 24485f44c854SHong Zhang 24495f44c854SHong Zhang /* add the k-th row into il and jl */ 24505f44c854SHong Zhang if (nzk > 1) { 24515f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 24529371c9d4SSatish Balay jl[k] = jl[i]; 24539371c9d4SSatish Balay jl[i] = k; 24545f44c854SHong Zhang il[k] = ui[k] + 1; 24555f44c854SHong Zhang } 24565f44c854SHong Zhang uj_ptr[k] = current_space->array; 24575f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 24585f44c854SHong Zhang 24595f44c854SHong Zhang current_space->array += nzk; 24605f44c854SHong Zhang current_space->local_used += nzk; 24615f44c854SHong Zhang current_space->local_remaining -= nzk; 24625f44c854SHong Zhang 24635f44c854SHong Zhang current_space_lvl->array += nzk; 24645f44c854SHong Zhang current_space_lvl->local_used += nzk; 24655f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 24665f44c854SHong Zhang 24675f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 24685f44c854SHong Zhang } 24695f44c854SHong Zhang 24709566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 24719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 24729566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 24739566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 24745f44c854SHong Zhang 24759263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 24769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 24779566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 24789566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 24799566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 24805f44c854SHong Zhang 24815f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 24825f44c854SHong Zhang 24835f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 24845f44c854SHong Zhang b = (Mat_SeqSBAIJ *)(fact)->data; 24855f44c854SHong Zhang b->singlemalloc = PETSC_FALSE; 24862205254eSKarl Rupp 24879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 24882205254eSKarl Rupp 24895f44c854SHong Zhang b->j = uj; 24905f44c854SHong Zhang b->i = ui; 24915f44c854SHong Zhang b->diag = udiag; 24927f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2493f4259b30SLisandro Dalcin b->ilen = NULL; 2494f4259b30SLisandro Dalcin b->imax = NULL; 24955f44c854SHong Zhang b->row = perm; 24965f44c854SHong Zhang b->col = perm; 24979566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 24989566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 24995f44c854SHong Zhang b->icol = iperm; 25005f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 25012205254eSKarl Rupp 25029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 25039566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar)))); 25042205254eSKarl Rupp 25055f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 25065f44c854SHong Zhang b->free_a = PETSC_TRUE; 25075f44c854SHong Zhang b->free_ij = PETSC_TRUE; 25085f44c854SHong Zhang 2509f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2510f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 25115f44c854SHong Zhang if (ai[am] != 0) { 25126a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 251395b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 25145f44c854SHong Zhang } else { 2515f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 25165f44c854SHong Zhang } 25179263d837SHong Zhang #if defined(PETSC_USE_INFO) 25189263d837SHong Zhang if (ai[am] != 0) { 25199263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 25209566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 25219566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 25229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 25239263d837SHong Zhang } else { 25249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 25259263d837SHong Zhang } 25269263d837SHong Zhang #endif 252735233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 25285f44c854SHong Zhang PetscFunctionReturn(0); 25295f44c854SHong Zhang } 25305f44c854SHong Zhang 25319371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 25325f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 25335f44c854SHong Zhang Mat_SeqSBAIJ *b; 2534ace3abfcSBarry Smith PetscBool perm_identity, missing; 25355f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 25365f44c854SHong Zhang const PetscInt *rip, *riip; 25375f44c854SHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 25380298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 25395f44c854SHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 25405f44c854SHong Zhang PetscReal fill = info->fill, levels = info->levels; 25410298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 25420298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 25435f44c854SHong Zhang PetscBT lnkbt; 25445f44c854SHong Zhang IS iperm; 25455f44c854SHong Zhang 25465f44c854SHong Zhang PetscFunctionBegin; 254708401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 25489566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 254928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 25509566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 25519566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 25525f44c854SHong Zhang 25539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 25549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 25555f44c854SHong Zhang ui[0] = 0; 25565f44c854SHong Zhang 25575f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 25585f44c854SHong Zhang if (!levels && perm_identity) { 25595f44c854SHong Zhang for (i = 0; i < am; i++) { 25605f44c854SHong Zhang ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 25615f44c854SHong Zhang udiag[i] = ui[i]; 2562ed59401aSHong Zhang } 25639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 256439e3d630SHong Zhang cols = uj; 2565ed59401aSHong Zhang for (i = 0; i < am; i++) { 2566ed59401aSHong Zhang aj = a->j + a->diag[i]; 256739e3d630SHong Zhang ncols = ui[i + 1] - ui[i]; 256839e3d630SHong Zhang for (j = 0; j < ncols; j++) *cols++ = *aj++; 2569ed59401aSHong Zhang } 257039e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 25719566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 25729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 2573b635d36bSHong Zhang 25747a48dd6fSHong Zhang /* initialization */ 25759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 25767a48dd6fSHong Zhang 25777a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 25787a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 25799566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 25807a48dd6fSHong Zhang for (i = 0; i < am; i++) { 25819371c9d4SSatish Balay jl[i] = am; 25829371c9d4SSatish Balay il[i] = 0; 25837a48dd6fSHong Zhang } 25847a48dd6fSHong Zhang 25857a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 25867a48dd6fSHong Zhang nlnk = am + 1; 25879566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 25887a48dd6fSHong Zhang 25897a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 25909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 25917a48dd6fSHong Zhang current_space = free_space; 25929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 25937a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 25947a48dd6fSHong Zhang 25957a48dd6fSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 25967a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 25977a48dd6fSHong Zhang nzk = 0; 2598622e2028SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 259928b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2600622e2028SHong Zhang ncols_upper = 0; 2601622e2028SHong Zhang for (j = 0; j < ncols; j++) { 2602b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2603b635d36bSHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 26045a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2605622e2028SHong Zhang ncols_upper++; 2606622e2028SHong Zhang } 2607622e2028SHong Zhang } 26089566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 26097a48dd6fSHong Zhang nzk += nlnk; 26107a48dd6fSHong Zhang 26117a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 26127a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 26137a48dd6fSHong Zhang 26147a48dd6fSHong Zhang while (prow < k) { 26157a48dd6fSHong Zhang nextprow = jl[prow]; 26167a48dd6fSHong Zhang 26177a48dd6fSHong Zhang /* merge prow into k-th row */ 26187a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 26197a48dd6fSHong Zhang jmax = ui[prow + 1]; 26207a48dd6fSHong Zhang ncols = jmax - jmin; 2621ed59401aSHong Zhang i = jmin - ui[prow]; 2622ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 26235a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 26245a8e39fbSHong Zhang j = *(uj - 1); 26259566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 26267a48dd6fSHong Zhang nzk += nlnk; 26277a48dd6fSHong Zhang 26287a48dd6fSHong Zhang /* update il and jl for prow */ 26297a48dd6fSHong Zhang if (jmin < jmax) { 26307a48dd6fSHong Zhang il[prow] = jmin; 26319371c9d4SSatish Balay j = *cols; 26329371c9d4SSatish Balay jl[prow] = jl[j]; 26339371c9d4SSatish Balay jl[j] = prow; 26347a48dd6fSHong Zhang } 26357a48dd6fSHong Zhang prow = nextprow; 26367a48dd6fSHong Zhang } 26377a48dd6fSHong Zhang 26387a48dd6fSHong Zhang /* if free space is not available, make more free space */ 26397a48dd6fSHong Zhang if (current_space->local_remaining < nzk) { 26407a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2641f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 26429566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 26439566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 26447a48dd6fSHong Zhang reallocs++; 26457a48dd6fSHong Zhang } 26467a48dd6fSHong Zhang 26477a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 264828b400f6SJacob Faibussowitsch PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 26499566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 26507a48dd6fSHong Zhang 26517a48dd6fSHong Zhang /* add the k-th row into il and jl */ 265239e3d630SHong Zhang if (nzk > 1) { 26537a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 26549371c9d4SSatish Balay jl[k] = jl[i]; 26559371c9d4SSatish Balay jl[i] = k; 26567a48dd6fSHong Zhang il[k] = ui[k] + 1; 26577a48dd6fSHong Zhang } 26587a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 26597a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 26607a48dd6fSHong Zhang 26617a48dd6fSHong Zhang current_space->array += nzk; 26627a48dd6fSHong Zhang current_space->local_used += nzk; 26637a48dd6fSHong Zhang current_space->local_remaining -= nzk; 26647a48dd6fSHong Zhang 26657a48dd6fSHong Zhang current_space_lvl->array += nzk; 26667a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 26677a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 26687a48dd6fSHong Zhang 26697a48dd6fSHong Zhang ui[k + 1] = ui[k] + nzk; 26707a48dd6fSHong Zhang } 26717a48dd6fSHong Zhang 26726cf91177SBarry Smith #if defined(PETSC_USE_INFO) 26737a48dd6fSHong Zhang if (ai[am] != 0) { 267439e3d630SHong Zhang PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 26759566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 26769566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 26779566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 26787a48dd6fSHong Zhang } else { 26799566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 26807a48dd6fSHong Zhang } 268163ba0a88SBarry Smith #endif 26827a48dd6fSHong Zhang 26839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 26849566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 26859566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 26869566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 26877a48dd6fSHong Zhang 26887a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 26899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 26909566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 26919566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 26929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 26937a48dd6fSHong Zhang 269439e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 269539e3d630SHong Zhang 26967a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 26977a48dd6fSHong Zhang 2698f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 26997a48dd6fSHong Zhang b->singlemalloc = PETSC_FALSE; 27002205254eSKarl Rupp 27019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 27022205254eSKarl Rupp 27037a48dd6fSHong Zhang b->j = uj; 27047a48dd6fSHong Zhang b->i = ui; 27055f44c854SHong Zhang b->diag = udiag; 27067f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2707f4259b30SLisandro Dalcin b->ilen = NULL; 2708f4259b30SLisandro Dalcin b->imax = NULL; 27097a48dd6fSHong Zhang b->row = perm; 2710b635d36bSHong Zhang b->col = perm; 27112205254eSKarl Rupp 27129566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 27139566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 27142205254eSKarl Rupp 2715b635d36bSHong Zhang b->icol = iperm; 27167a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 27179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 27189566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar)))); 27197a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 2720e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2721e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 27227a48dd6fSHong Zhang 2723f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2724f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 27257a48dd6fSHong Zhang if (ai[am] != 0) { 2726f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 27277a48dd6fSHong Zhang } else { 2728f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 27297a48dd6fSHong Zhang } 27300a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2731a6175056SHong Zhang PetscFunctionReturn(0); 2732a6175056SHong Zhang } 2733d5d45c9bSBarry Smith 27349371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 27350be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 27360be760fbSHong Zhang Mat_SeqSBAIJ *b; 27379186b105SHong Zhang PetscBool perm_identity, missing; 27380be760fbSHong Zhang PetscReal fill = info->fill; 27390be760fbSHong Zhang const PetscInt *rip, *riip; 27400be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 27410be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 27420be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 27430298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 27440be760fbSHong Zhang PetscBT lnkbt; 27450be760fbSHong Zhang IS iperm; 27460be760fbSHong Zhang 27470be760fbSHong Zhang PetscFunctionBegin; 274808401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 27499566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 275028b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 27519186b105SHong Zhang 27520be760fbSHong Zhang /* check whether perm is the identity mapping */ 27539566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 27549566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 27559566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 27569566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 27570be760fbSHong Zhang 27580be760fbSHong Zhang /* initialization */ 27599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 27609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 27610be760fbSHong Zhang ui[0] = 0; 27620be760fbSHong Zhang 27630be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 27640be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 27659566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 27660be760fbSHong Zhang for (i = 0; i < am; i++) { 27679371c9d4SSatish Balay jl[i] = am; 27689371c9d4SSatish Balay il[i] = 0; 27690be760fbSHong Zhang } 27700be760fbSHong Zhang 27710be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 27720be760fbSHong Zhang nlnk = am + 1; 27739566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 27740be760fbSHong Zhang 277595b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 27769566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 27770be760fbSHong Zhang current_space = free_space; 27780be760fbSHong Zhang 27790be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 27800be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 27810be760fbSHong Zhang nzk = 0; 27820be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 278328b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 27840be760fbSHong Zhang ncols_upper = 0; 27850be760fbSHong Zhang for (j = 0; j < ncols; j++) { 27860be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 27870be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 27880be760fbSHong Zhang cols[ncols_upper] = i; 27890be760fbSHong Zhang ncols_upper++; 27900be760fbSHong Zhang } 27910be760fbSHong Zhang } 27929566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 27930be760fbSHong Zhang nzk += nlnk; 27940be760fbSHong Zhang 27950be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 27960be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 27970be760fbSHong Zhang 27980be760fbSHong Zhang while (prow < k) { 27990be760fbSHong Zhang nextprow = jl[prow]; 28000be760fbSHong Zhang /* merge prow into k-th row */ 28010be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 28020be760fbSHong Zhang jmax = ui[prow + 1]; 28030be760fbSHong Zhang ncols = jmax - jmin; 28040be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 28059566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 28060be760fbSHong Zhang nzk += nlnk; 28070be760fbSHong Zhang 28080be760fbSHong Zhang /* update il and jl for prow */ 28090be760fbSHong Zhang if (jmin < jmax) { 28100be760fbSHong Zhang il[prow] = jmin; 28112205254eSKarl Rupp j = *uj_ptr; 28122205254eSKarl Rupp jl[prow] = jl[j]; 28132205254eSKarl Rupp jl[j] = prow; 28140be760fbSHong Zhang } 28150be760fbSHong Zhang prow = nextprow; 28160be760fbSHong Zhang } 28170be760fbSHong Zhang 28180be760fbSHong Zhang /* if free space is not available, make more free space */ 28190be760fbSHong Zhang if (current_space->local_remaining < nzk) { 28200be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2821f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 28229566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 28230be760fbSHong Zhang reallocs++; 28240be760fbSHong Zhang } 28250be760fbSHong Zhang 28260be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 28279566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 28280be760fbSHong Zhang 28290be760fbSHong Zhang /* add the k-th row into il and jl */ 28307b056e98SHong Zhang if (nzk > 1) { 28310be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 28329371c9d4SSatish Balay jl[k] = jl[i]; 28339371c9d4SSatish Balay jl[i] = k; 28340be760fbSHong Zhang il[k] = ui[k] + 1; 28350be760fbSHong Zhang } 28360be760fbSHong Zhang ui_ptr[k] = current_space->array; 28372205254eSKarl Rupp 28380be760fbSHong Zhang current_space->array += nzk; 28390be760fbSHong Zhang current_space->local_used += nzk; 28400be760fbSHong Zhang current_space->local_remaining -= nzk; 28410be760fbSHong Zhang 28420be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 28430be760fbSHong Zhang } 28440be760fbSHong Zhang 28459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 28469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 28479566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 28480be760fbSHong Zhang 28499263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 28509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 28519566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 28529566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 28530be760fbSHong Zhang 28540be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 28550be760fbSHong Zhang 2856f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 28570be760fbSHong Zhang b->singlemalloc = PETSC_FALSE; 28580be760fbSHong Zhang b->free_a = PETSC_TRUE; 28590be760fbSHong Zhang b->free_ij = PETSC_TRUE; 28602205254eSKarl Rupp 28619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 28622205254eSKarl Rupp 28630be760fbSHong Zhang b->j = uj; 28640be760fbSHong Zhang b->i = ui; 28650be760fbSHong Zhang b->diag = udiag; 28660be760fbSHong Zhang b->free_diag = PETSC_TRUE; 2867f4259b30SLisandro Dalcin b->ilen = NULL; 2868f4259b30SLisandro Dalcin b->imax = NULL; 28690be760fbSHong Zhang b->row = perm; 28700be760fbSHong Zhang b->col = perm; 287126fbe8dcSKarl Rupp 28729566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28739566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28742205254eSKarl Rupp 28750be760fbSHong Zhang b->icol = iperm; 28760be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 287726fbe8dcSKarl Rupp 28789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 28799566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar)))); 28802205254eSKarl Rupp 28810be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 28820be760fbSHong Zhang 2883f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2884f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 28850be760fbSHong Zhang if (ai[am] != 0) { 28866a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 288795b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 28880be760fbSHong Zhang } else { 2889f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 28900be760fbSHong Zhang } 28919263d837SHong Zhang #if defined(PETSC_USE_INFO) 28929263d837SHong Zhang if (ai[am] != 0) { 28939263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 28949566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 28959566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 28969566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 28979263d837SHong Zhang } else { 28989566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 28999263d837SHong Zhang } 29009263d837SHong Zhang #endif 290135233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 29020be760fbSHong Zhang PetscFunctionReturn(0); 29030be760fbSHong Zhang } 29040be760fbSHong Zhang 29059371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2906f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 290710c27e3dSHong Zhang Mat_SeqSBAIJ *b; 29089186b105SHong Zhang PetscBool perm_identity, missing; 290910c27e3dSHong Zhang PetscReal fill = info->fill; 29105d0c19d7SBarry Smith const PetscInt *rip, *riip; 29115d0c19d7SBarry Smith PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 291210c27e3dSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 29132ed133dbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 29140298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 291510c27e3dSHong Zhang PetscBT lnkbt; 29162ed133dbSHong Zhang IS iperm; 2917f76d2b81SHong Zhang 2918f76d2b81SHong Zhang PetscFunctionBegin; 291908401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 29209566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 292128b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 29229186b105SHong Zhang 29232ed133dbSHong Zhang /* check whether perm is the identity mapping */ 29249566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 29259566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 29269566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 29279566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 292810c27e3dSHong Zhang 292910c27e3dSHong Zhang /* initialization */ 29309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 293110c27e3dSHong Zhang ui[0] = 0; 293210c27e3dSHong Zhang 293310c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 29341393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 29359566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 29361393bc97SHong Zhang for (i = 0; i < am; i++) { 29379371c9d4SSatish Balay jl[i] = am; 29389371c9d4SSatish Balay il[i] = 0; 2939f76d2b81SHong Zhang } 294010c27e3dSHong Zhang 294110c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 29421393bc97SHong Zhang nlnk = am + 1; 29439566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 294410c27e3dSHong Zhang 29451393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 29469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 294710c27e3dSHong Zhang current_space = free_space; 294810c27e3dSHong Zhang 29491393bc97SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 295010c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 295110c27e3dSHong Zhang nzk = 0; 29522ed133dbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 295394bad497SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 29542ed133dbSHong Zhang ncols_upper = 0; 29552ed133dbSHong Zhang for (j = 0; j < ncols; j++) { 29569bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 29572ed133dbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 29582ed133dbSHong Zhang cols[ncols_upper] = i; 29592ed133dbSHong Zhang ncols_upper++; 29602ed133dbSHong Zhang } 29612ed133dbSHong Zhang } 29629566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 296310c27e3dSHong Zhang nzk += nlnk; 296410c27e3dSHong Zhang 296510c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 296610c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 296710c27e3dSHong Zhang 296810c27e3dSHong Zhang while (prow < k) { 296910c27e3dSHong Zhang nextprow = jl[prow]; 297010c27e3dSHong Zhang /* merge prow into k-th row */ 29711393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 297210c27e3dSHong Zhang jmax = ui[prow + 1]; 297310c27e3dSHong Zhang ncols = jmax - jmin; 29741393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 29759566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 297610c27e3dSHong Zhang nzk += nlnk; 297710c27e3dSHong Zhang 297810c27e3dSHong Zhang /* update il and jl for prow */ 297910c27e3dSHong Zhang if (jmin < jmax) { 298010c27e3dSHong Zhang il[prow] = jmin; 29819371c9d4SSatish Balay j = *uj_ptr; 29829371c9d4SSatish Balay jl[prow] = jl[j]; 29839371c9d4SSatish Balay jl[j] = prow; 298410c27e3dSHong Zhang } 298510c27e3dSHong Zhang prow = nextprow; 298610c27e3dSHong Zhang } 298710c27e3dSHong Zhang 298810c27e3dSHong Zhang /* if free space is not available, make more free space */ 298910c27e3dSHong Zhang if (current_space->local_remaining < nzk) { 29901393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 299110c27e3dSHong Zhang i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 29929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 299310c27e3dSHong Zhang reallocs++; 299410c27e3dSHong Zhang } 299510c27e3dSHong Zhang 299610c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 29979566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 299810c27e3dSHong Zhang 299910c27e3dSHong Zhang /* add the k-th row into il and jl */ 300010c27e3dSHong Zhang if (nzk - 1 > 0) { 30011393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 30029371c9d4SSatish Balay jl[k] = jl[i]; 30039371c9d4SSatish Balay jl[i] = k; 300410c27e3dSHong Zhang il[k] = ui[k] + 1; 300510c27e3dSHong Zhang } 30062ed133dbSHong Zhang ui_ptr[k] = current_space->array; 30072205254eSKarl Rupp 300810c27e3dSHong Zhang current_space->array += nzk; 300910c27e3dSHong Zhang current_space->local_used += nzk; 301010c27e3dSHong Zhang current_space->local_remaining -= nzk; 301110c27e3dSHong Zhang 301210c27e3dSHong Zhang ui[k + 1] = ui[k] + nzk; 301310c27e3dSHong Zhang } 301410c27e3dSHong Zhang 30156cf91177SBarry Smith #if defined(PETSC_USE_INFO) 30161393bc97SHong Zhang if (ai[am] != 0) { 30171393bc97SHong Zhang PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 30189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 30199566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 30209566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 302110c27e3dSHong Zhang } else { 30229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 302310c27e3dSHong Zhang } 302463ba0a88SBarry Smith #endif 302510c27e3dSHong Zhang 30269566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 30279566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 30289566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 302910c27e3dSHong Zhang 303010c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 30319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 30329566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 30339566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 303410c27e3dSHong Zhang 303510c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 303610c27e3dSHong Zhang 3037f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 303810c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 3039e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3040e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 30412205254eSKarl Rupp 30429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 30432205254eSKarl Rupp 304410c27e3dSHong Zhang b->j = uj; 304510c27e3dSHong Zhang b->i = ui; 3046f4259b30SLisandro Dalcin b->diag = NULL; 3047f4259b30SLisandro Dalcin b->ilen = NULL; 3048f4259b30SLisandro Dalcin b->imax = NULL; 304910c27e3dSHong Zhang b->row = perm; 30509bfd6278SHong Zhang b->col = perm; 30512205254eSKarl Rupp 30529566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30539566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30542205254eSKarl Rupp 30559bfd6278SHong Zhang b->icol = iperm; 305610c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 30572205254eSKarl Rupp 30589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 30599566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar)))); 30601393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 306110c27e3dSHong Zhang 3062f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 3063f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 30641393bc97SHong Zhang if (ai[am] != 0) { 3065f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 306610c27e3dSHong Zhang } else { 3067f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 306810c27e3dSHong Zhang } 30690a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3070f76d2b81SHong Zhang PetscFunctionReturn(0); 3071f76d2b81SHong Zhang } 3072599ef60dSHong Zhang 30739371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) { 3074813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3075813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3076813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3077813a1f2bSShri Abhyankar PetscScalar *x, sum; 3078813a1f2bSShri Abhyankar const PetscScalar *b; 3079813a1f2bSShri Abhyankar const MatScalar *aa = a->a, *v; 3080813a1f2bSShri Abhyankar PetscInt i, nz; 3081813a1f2bSShri Abhyankar 3082813a1f2bSShri Abhyankar PetscFunctionBegin; 3083813a1f2bSShri Abhyankar if (!n) PetscFunctionReturn(0); 3084813a1f2bSShri Abhyankar 30859566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 30869566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3087813a1f2bSShri Abhyankar 3088813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3089813a1f2bSShri Abhyankar x[0] = b[0]; 3090813a1f2bSShri Abhyankar v = aa; 3091813a1f2bSShri Abhyankar vi = aj; 3092813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 3093813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 3094813a1f2bSShri Abhyankar sum = b[i]; 3095813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3096813a1f2bSShri Abhyankar v += nz; 3097813a1f2bSShri Abhyankar vi += nz; 3098813a1f2bSShri Abhyankar x[i] = sum; 3099813a1f2bSShri Abhyankar } 3100813a1f2bSShri Abhyankar 3101813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 310262fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 31033c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 31043c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3105813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3106813a1f2bSShri Abhyankar sum = x[i]; 3107813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 31083c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3109813a1f2bSShri Abhyankar } 3110813a1f2bSShri Abhyankar 31119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 31129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 31139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 3114813a1f2bSShri Abhyankar PetscFunctionReturn(0); 3115813a1f2bSShri Abhyankar } 3116813a1f2bSShri Abhyankar 31179371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) { 3118f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3119f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 3120f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3121f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 3122f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 3123f268cba8SShri Abhyankar const PetscScalar *b; 3124f268cba8SShri Abhyankar const MatScalar *aa = a->a, *v; 3125f268cba8SShri Abhyankar 3126f268cba8SShri Abhyankar PetscFunctionBegin; 3127f268cba8SShri Abhyankar if (!n) PetscFunctionReturn(0); 3128f268cba8SShri Abhyankar 31299566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 31309566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3131f268cba8SShri Abhyankar tmp = a->solve_work; 3132f268cba8SShri Abhyankar 31339371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 31349371c9d4SSatish Balay r = rout; 31359371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 31369371c9d4SSatish Balay c = cout; 3137f268cba8SShri Abhyankar 3138f268cba8SShri Abhyankar /* forward solve the lower triangular */ 3139f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 3140f268cba8SShri Abhyankar v = aa; 3141f268cba8SShri Abhyankar vi = aj; 3142f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 3143f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 3144f268cba8SShri Abhyankar sum = b[r[i]]; 3145f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3146f268cba8SShri Abhyankar tmp[i] = sum; 31479371c9d4SSatish Balay v += nz; 31489371c9d4SSatish Balay vi += nz; 3149f268cba8SShri Abhyankar } 3150f268cba8SShri Abhyankar 3151f268cba8SShri Abhyankar /* backward solve the upper triangular */ 3152f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 31533c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 31543c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3155f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3156f268cba8SShri Abhyankar sum = tmp[i]; 3157f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3158f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3159f268cba8SShri Abhyankar } 3160f268cba8SShri Abhyankar 31619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 31629566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 31639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 31649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 31659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3166f268cba8SShri Abhyankar PetscFunctionReturn(0); 3167f268cba8SShri Abhyankar } 3168f268cba8SShri Abhyankar 3169fe97e370SBarry Smith /* 31706aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3171fe97e370SBarry Smith */ 31729371c9d4SSatish Balay PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) { 31731d098868SHong Zhang Mat B = *fact; 3174599ef60dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3175599ef60dSHong Zhang IS isicol; 31761d098868SHong Zhang const PetscInt *r, *ic; 31771d098868SHong Zhang PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3178dc3a2fd3SHong Zhang PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3179f61a948aSLisandro Dalcin PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 31801d098868SHong Zhang PetscInt nlnk, *lnk; 31811d098868SHong Zhang PetscBT lnkbt; 3182ace3abfcSBarry Smith PetscBool row_identity, icol_identity; 31838aee2decSHong Zhang MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 31841d098868SHong Zhang const PetscInt *ics; 31851d098868SHong Zhang PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3186a2ea699eSBarry Smith PetscReal dt = info->dt, shift = info->shiftamount; 3187f61a948aSLisandro Dalcin PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3188ace3abfcSBarry Smith PetscBool missing; 3189599ef60dSHong Zhang 3190599ef60dSHong Zhang PetscFunctionBegin; 3191f61a948aSLisandro Dalcin if (dt == PETSC_DEFAULT) dt = 0.005; 3192f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3193f61a948aSLisandro Dalcin 31941d098868SHong Zhang /* ------- symbolic factorization, can be reused ---------*/ 31959566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 319628b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 31971d098868SHong Zhang adiag = a->diag; 3198599ef60dSHong Zhang 31999566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3200599ef60dSHong Zhang 3201599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 32029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 32039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3204599ef60dSHong Zhang 32051d098868SHong Zhang /* allocate row pointers bi */ 32069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * n + 2, &bi)); 32071d098868SHong Zhang 32081d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3209393d3a68SHong Zhang if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 32101d098868SHong Zhang nnz_max = ai[n] + 2 * n * dtcount + 2; 32118fc3a347SHong Zhang 32129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 32139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3214599ef60dSHong Zhang 3215599ef60dSHong Zhang /* put together the new matrix */ 32169566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 32179566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 3218f284f12aSHong Zhang b = (Mat_SeqAIJ *)B->data; 32192205254eSKarl Rupp 3220599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3221599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3222599ef60dSHong Zhang b->singlemalloc = PETSC_FALSE; 32232205254eSKarl Rupp 3224599ef60dSHong Zhang b->a = ba; 3225599ef60dSHong Zhang b->j = bj; 3226599ef60dSHong Zhang b->i = bi; 3227599ef60dSHong Zhang b->diag = bdiag; 3228f4259b30SLisandro Dalcin b->ilen = NULL; 3229f4259b30SLisandro Dalcin b->imax = NULL; 3230599ef60dSHong Zhang b->row = isrow; 3231599ef60dSHong Zhang b->col = iscol; 32329566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 32339566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3234599ef60dSHong Zhang b->icol = isicol; 3235599ef60dSHong Zhang 32369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 32379566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B, nnz_max * (sizeof(PetscInt) + sizeof(MatScalar)))); 32381d098868SHong Zhang b->maxnz = nnz_max; 3239599ef60dSHong Zhang 3240d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ILUDT; 3241f284f12aSHong Zhang B->info.factor_mallocs = 0; 3242f284f12aSHong Zhang B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 32431d098868SHong Zhang /* ------- end of symbolic factorization ---------*/ 3244599ef60dSHong Zhang 32459566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 32469566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 3247599ef60dSHong Zhang ics = ic; 3248599ef60dSHong Zhang 3249599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3250599ef60dSHong Zhang nlnk = n + 1; 32519566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 32521d098868SHong Zhang 32531d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 32549566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 32551d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 32569566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 32579566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n)); 3258599ef60dSHong Zhang 3259599ef60dSHong Zhang bi[0] = 0; 32608fc3a347SHong Zhang bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3261dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 32628fc3a347SHong Zhang bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3263599ef60dSHong Zhang for (i = 0; i < n; i++) { 3264599ef60dSHong Zhang /* copy initial fill into linked list */ 32658369b28dSHong Zhang nzi = ai[r[i] + 1] - ai[r[i]]; 326694bad497SJacob Faibussowitsch PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 32678369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 32688369b28dSHong Zhang nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3269599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 32709566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3271599ef60dSHong Zhang 3272599ef60dSHong Zhang /* load in initial (unfactored row) */ 32731d098868SHong Zhang aatmp = a->a + ai[r[i]]; 32749371c9d4SSatish Balay for (j = 0; j < nzi; j++) { rtmp[ics[*ajtmp++]] = *aatmp++; } 3275599ef60dSHong Zhang 3276599ef60dSHong Zhang /* add pivot rows into linked list */ 3277599ef60dSHong Zhang row = lnk[n]; 3278599ef60dSHong Zhang while (row < i) { 32791d098868SHong Zhang nzi_bl = bi[row + 1] - bi[row] + 1; 32801d098868SHong Zhang bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 32819566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3282599ef60dSHong Zhang nzi += nlnk; 3283599ef60dSHong Zhang row = lnk[row]; 3284599ef60dSHong Zhang } 3285599ef60dSHong Zhang 32868369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 32879566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3288599ef60dSHong Zhang 3289599ef60dSHong Zhang /* numerical factorization */ 32908369b28dSHong Zhang bjtmp = jtmp; 3291599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3292599ef60dSHong Zhang while (row < i) { 3293599ef60dSHong Zhang pc = rtmp + row; 32943c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 32953c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3296599ef60dSHong Zhang *pc = multiplier; 32973c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 32981d098868SHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 32991d098868SHong Zhang pv = ba + bdiag[row + 1] + 1; 33001d098868SHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 33011d098868SHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 33029566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3303599ef60dSHong Zhang } 3304599ef60dSHong Zhang row = *bjtmp++; 3305599ef60dSHong Zhang } 3306599ef60dSHong Zhang 33078369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 33088aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 33099371c9d4SSatish Balay nzi_bl = 0; 33109371c9d4SSatish Balay j = 0; 33118369b28dSHong Zhang while (jtmp[j] < i) { /* Note: jtmp is sorted */ 33129371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 33139371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 33149371c9d4SSatish Balay nzi_bl++; 33159371c9d4SSatish Balay j++; 33168369b28dSHong Zhang } 33178369b28dSHong Zhang nzi_bu = nzi - nzi_bl - 1; 33188369b28dSHong Zhang while (j < nzi) { 33199371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 33209371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 33218369b28dSHong Zhang j++; 33228369b28dSHong Zhang } 33238369b28dSHong Zhang 3324599ef60dSHong Zhang bjtmp = bj + bi[i]; 3325599ef60dSHong Zhang batmp = ba + bi[i]; 33268369b28dSHong Zhang /* apply level dropping rule to L part */ 33278369b28dSHong Zhang ncut = nzi_al + dtcount; 33288369b28dSHong Zhang if (ncut < nzi_bl) { 33299566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 33309566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3331599ef60dSHong Zhang } else { 33328369b28dSHong Zhang ncut = nzi_bl; 33338369b28dSHong Zhang } 33348369b28dSHong Zhang for (j = 0; j < ncut; j++) { 33358369b28dSHong Zhang bjtmp[j] = jtmp[j]; 33368369b28dSHong Zhang batmp[j] = vtmp[j]; 33378369b28dSHong Zhang } 33381d098868SHong Zhang bi[i + 1] = bi[i] + ncut; 33398369b28dSHong Zhang nzi = ncut + 1; 33408369b28dSHong Zhang 33418369b28dSHong Zhang /* apply level dropping rule to U part */ 33428369b28dSHong Zhang ncut = nzi_au + dtcount; 33438369b28dSHong Zhang if (ncut < nzi_bu) { 33449566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 33459566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 33468369b28dSHong Zhang } else { 33478369b28dSHong Zhang ncut = nzi_bu; 33488369b28dSHong Zhang } 33498369b28dSHong Zhang nzi += ncut; 33501d098868SHong Zhang 33511d098868SHong Zhang /* mark bdiagonal */ 33521d098868SHong Zhang bdiag[i + 1] = bdiag[i] - (ncut + 1); 3353dc3a2fd3SHong Zhang bdiag_rev[n - i - 1] = bdiag[i + 1]; 33548fc3a347SHong Zhang bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 33551d098868SHong Zhang bjtmp = bj + bdiag[i]; 33561d098868SHong Zhang batmp = ba + bdiag[i]; 33571d098868SHong Zhang *bjtmp = i; 33588aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 33599371c9d4SSatish Balay if (*batmp == 0.0) { *batmp = dt + shift; } 3360a5b23f4aSJose E. Roman *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 33611d098868SHong Zhang 33621d098868SHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 33631d098868SHong Zhang batmp = ba + bdiag[i + 1] + 1; 33648369b28dSHong Zhang for (k = 0; k < ncut; k++) { 33651d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl + 1 + k]; 33661d098868SHong Zhang batmp[k] = vtmp[nzi_bl + 1 + k]; 3367599ef60dSHong Zhang } 3368599ef60dSHong Zhang 3369599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 33708369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 337163a3b9bcSJacob Faibussowitsch PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3372599ef60dSHong Zhang 33739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 33749566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3375599ef60dSHong Zhang 33769566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 33779566063dSJacob Faibussowitsch PetscCall(PetscFree2(im, jtmp)); 33789566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, vtmp)); 33799566063dSJacob Faibussowitsch PetscCall(PetscFree(bdiag_rev)); 3380599ef60dSHong Zhang 33819566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n)); 33821d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3383599ef60dSHong Zhang 33849566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 33859566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &icol_identity)); 3386599ef60dSHong Zhang if (row_identity && icol_identity) { 338735233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3388599ef60dSHong Zhang } else { 338935233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ; 3390599ef60dSHong Zhang } 3391599ef60dSHong Zhang 3392f4259b30SLisandro Dalcin B->ops->solveadd = NULL; 3393f4259b30SLisandro Dalcin B->ops->solvetranspose = NULL; 3394f4259b30SLisandro Dalcin B->ops->solvetransposeadd = NULL; 3395f4259b30SLisandro Dalcin B->ops->matsolve = NULL; 3396599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3397599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 3398599ef60dSHong Zhang PetscFunctionReturn(0); 3399599ef60dSHong Zhang } 3400599ef60dSHong Zhang 34013c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */ 3402fe97e370SBarry Smith /* 34036aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3404fe97e370SBarry Smith */ 3405fe97e370SBarry Smith 34069371c9d4SSatish Balay PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 34073c2a7987SHong Zhang PetscFunctionBegin; 34089566063dSJacob Faibussowitsch PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 34093c2a7987SHong Zhang PetscFunctionReturn(0); 34103c2a7987SHong Zhang } 34113c2a7987SHong Zhang 34123c2a7987SHong Zhang /* 34133c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 34143c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 34153c2a7987SHong Zhang */ 3416fe97e370SBarry Smith /* 34176aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3418fe97e370SBarry Smith */ 3419fe97e370SBarry Smith 34209371c9d4SSatish Balay PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) { 34213c2a7987SHong Zhang Mat C = fact; 34223c2a7987SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 34233c2a7987SHong Zhang IS isrow = b->row, isicol = b->icol; 34243c2a7987SHong Zhang const PetscInt *r, *ic, *ics; 3425b78a477dSHong Zhang PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3426b78a477dSHong Zhang PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 34273c2a7987SHong Zhang MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3428182b8fbaSHong Zhang PetscReal dt = info->dt, shift = info->shiftamount; 3429ace3abfcSBarry Smith PetscBool row_identity, col_identity; 34303c2a7987SHong Zhang 34313c2a7987SHong Zhang PetscFunctionBegin; 34329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 34339566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 34349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 34353c2a7987SHong Zhang ics = ic; 34363c2a7987SHong Zhang 34373c2a7987SHong Zhang for (i = 0; i < n; i++) { 3438b78a477dSHong Zhang /* initialize rtmp array */ 3439b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 34403c2a7987SHong Zhang bjtmp = bj + bi[i]; 3441b78a477dSHong Zhang for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3442b78a477dSHong Zhang rtmp[i] = 0.0; 3443b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3444b78a477dSHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 3445b78a477dSHong Zhang for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 34463c2a7987SHong Zhang 3447b78a477dSHong Zhang /* load in initial unfactored row of A */ 34483c2a7987SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 34493c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 34503c2a7987SHong Zhang v = aa + ai[r[i]]; 34519371c9d4SSatish Balay for (j = 0; j < nz; j++) { rtmp[ics[*ajtmp++]] = v[j]; } 34523c2a7987SHong Zhang 3453b78a477dSHong Zhang /* numerical factorization */ 3454b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3455b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3456b78a477dSHong Zhang k = 0; 3457b78a477dSHong Zhang while (k < nzl) { 34583c2a7987SHong Zhang row = *bjtmp++; 34593c2a7987SHong Zhang pc = rtmp + row; 3460b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3461b78a477dSHong Zhang multiplier = (*pc) * (*pv); 34623c2a7987SHong Zhang *pc = multiplier; 3463b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt) { 3464b78a477dSHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3465b78a477dSHong Zhang pv = b->a + bdiag[row + 1] + 1; 3466b78a477dSHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3467b78a477dSHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 34689566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 34693c2a7987SHong Zhang } 3470b78a477dSHong Zhang k++; 34713c2a7987SHong Zhang } 34723c2a7987SHong Zhang 3473b78a477dSHong Zhang /* finished row so stick it into b->a */ 3474b78a477dSHong Zhang /* L-part */ 3475b78a477dSHong Zhang pv = b->a + bi[i]; 3476b78a477dSHong Zhang pj = bj + bi[i]; 3477b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; 34789371c9d4SSatish Balay for (j = 0; j < nzl; j++) { pv[j] = rtmp[pj[j]]; } 3479b78a477dSHong Zhang 3480a5b23f4aSJose E. Roman /* diagonal: invert diagonal entries for simpler triangular solves */ 3481b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3482b78a477dSHong Zhang b->a[bdiag[i]] = 1.0 / rtmp[i]; 3483b78a477dSHong Zhang 3484b78a477dSHong Zhang /* U-part */ 3485b78a477dSHong Zhang pv = b->a + bdiag[i + 1] + 1; 3486b78a477dSHong Zhang pj = bj + bdiag[i + 1] + 1; 3487b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1] - 1; 34889371c9d4SSatish Balay for (j = 0; j < nzu; j++) { pv[j] = rtmp[pj[j]]; } 3489b78a477dSHong Zhang } 3490b78a477dSHong Zhang 34919566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 34929566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 34939566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3494b78a477dSHong Zhang 34959566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 34969566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 34973c2a7987SHong Zhang if (row_identity && col_identity) { 349835233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 34993c2a7987SHong Zhang } else { 350035233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 35013c2a7987SHong Zhang } 3502f4259b30SLisandro Dalcin C->ops->solveadd = NULL; 3503f4259b30SLisandro Dalcin C->ops->solvetranspose = NULL; 3504f4259b30SLisandro Dalcin C->ops->solvetransposeadd = NULL; 3505f4259b30SLisandro Dalcin C->ops->matsolve = NULL; 35063c2a7987SHong Zhang C->assembled = PETSC_TRUE; 35073c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 35082205254eSKarl Rupp 35099566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 35103c2a7987SHong Zhang PetscFunctionReturn(0); 35113c2a7987SHong Zhang } 3512