1b377110cSBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscbt.h> 5c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 63b2fbd54SBarry Smith 78fa24674SBarry Smith /* 88fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9312e372aSBarry Smith 10312e372aSBarry Smith This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 118fa24674SBarry Smith */ 129371c9d4SSatish Balay PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) { 138fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 14889a8dedSBarry Smith PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 158fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 168fa24674SBarry Smith const PetscScalar *aa = a->a; 17ace3abfcSBarry Smith PetscBool *done; 18889a8dedSBarry Smith PetscReal best, past = 0, future; 193a40ed3dSBarry Smith 208fa24674SBarry Smith PetscFunctionBegin; 218fa24674SBarry Smith /* pick initial row */ 228fa24674SBarry Smith best = -1; 238fa24674SBarry Smith for (i = 0; i < n; i++) { 2475567043SBarry Smith future = 0.0; 258fa24674SBarry Smith for (j = ai[i]; j < ai[i + 1]; j++) { 26db4deed7SKarl Rupp if (aj[j] != i) future += PetscAbsScalar(aa[j]); 27db4deed7SKarl Rupp else past = PetscAbsScalar(aa[j]); 288fa24674SBarry Smith } 298fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 308fa24674SBarry Smith if (past / future > best) { 318fa24674SBarry Smith best = past / future; 328fa24674SBarry Smith current = i; 338fa24674SBarry Smith } 348fa24674SBarry Smith } 358fa24674SBarry Smith 369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &done)); 379566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(done, n)); 389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &order)); 398fa24674SBarry Smith order[0] = current; 408fa24674SBarry Smith for (i = 0; i < n - 1; i++) { 418fa24674SBarry Smith done[current] = PETSC_TRUE; 428fa24674SBarry Smith best = -1; 438fa24674SBarry Smith /* loop over all neighbors of current pivot */ 448fa24674SBarry Smith for (j = ai[current]; j < ai[current + 1]; j++) { 458fa24674SBarry Smith jj = aj[j]; 468fa24674SBarry Smith if (done[jj]) continue; 478fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 488fa24674SBarry Smith past = future = 0.0; 498fa24674SBarry Smith for (k = ai[jj]; k < ai[jj + 1]; k++) { 508fa24674SBarry Smith kk = aj[k]; 518fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 528fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 538fa24674SBarry Smith } 548fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 558fa24674SBarry Smith if (past / future > best) { 568fa24674SBarry Smith best = past / future; 578fa24674SBarry Smith newcurrent = jj; 588fa24674SBarry Smith } 598fa24674SBarry Smith } 608fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 618fa24674SBarry Smith best = -1; 628fa24674SBarry Smith for (k = 0; k < n; k++) { 638fa24674SBarry Smith if (done[k]) continue; 6475567043SBarry Smith future = 0.0; 6575567043SBarry Smith past = 0.0; 668fa24674SBarry Smith for (j = ai[k]; j < ai[k + 1]; j++) { 678fa24674SBarry Smith kk = aj[j]; 688fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 698fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 708fa24674SBarry Smith } 718fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 728fa24674SBarry Smith if (past / future > best) { 738fa24674SBarry Smith best = past / future; 748fa24674SBarry Smith newcurrent = k; 758fa24674SBarry Smith } 768fa24674SBarry Smith } 778fa24674SBarry Smith } 7808401ef6SPierre Jolivet PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 798fa24674SBarry Smith current = newcurrent; 808fa24674SBarry Smith order[i + 1] = current; 818fa24674SBarry Smith } 829566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 838fa24674SBarry Smith *icol = *irow; 849566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)*irow)); 859566063dSJacob Faibussowitsch PetscCall(PetscFree(done)); 869566063dSJacob Faibussowitsch PetscCall(PetscFree(order)); 87d64ed03dSBarry Smith PetscFunctionReturn(0); 883b2fbd54SBarry Smith } 893b2fbd54SBarry Smith 909371c9d4SSatish Balay static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) { 914ac6704cSBarry Smith PetscFunctionBegin; 924ac6704cSBarry Smith *type = MATSOLVERPETSC; 934ac6704cSBarry Smith PetscFunctionReturn(0); 944ac6704cSBarry Smith } 954ac6704cSBarry Smith 969371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) { 97d0f46423SBarry Smith PetscInt n = A->rmap->n; 98b24902e0SBarry Smith 99b24902e0SBarry Smith PetscFunctionBegin; 100891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 101b94d7dedSBarry Smith PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 102891bceaeSHong Zhang #endif 1039566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 1049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 105599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 1069566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 1072205254eSKarl Rupp 10835233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 10935233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 1102205254eSKarl Rupp 1119566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 1129566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 1139566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 1149566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 115b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1169566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 1179566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 1182205254eSKarl Rupp 11935233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 12035233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 1219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 1229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 123e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 124d5f3da31SBarry Smith (*B)->factortype = ftype; 12500c67f3bSHong Zhang 1269566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 1279566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 128f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 1299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 130b24902e0SBarry Smith PetscFunctionReturn(0); 131b24902e0SBarry Smith } 132b24902e0SBarry Smith 1339371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 134416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 135289bc588SBarry Smith IS isicol; 1365d0c19d7SBarry Smith const PetscInt *r, *ic; 1375d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 1381393bc97SHong Zhang PetscInt *bi, *bj, *ajtmp; 1391393bc97SHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 1409e046878SBarry Smith PetscReal f; 1414875ba38SHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1420298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 1431393bc97SHong Zhang PetscBT lnkbt; 144ece542f9SHong Zhang PetscBool missing; 145289bc588SBarry Smith 1463a40ed3dSBarry Smith PetscFunctionBegin; 14708401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 1489566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 14928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 150ece542f9SHong Zhang 1519566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1529566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 1539566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 154289bc588SBarry Smith 155289bc588SBarry Smith /* get new row pointers */ 1569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 1571393bc97SHong Zhang bi[0] = 0; 1581393bc97SHong Zhang 1591393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1611393bc97SHong Zhang bdiag[0] = 0; 1621393bc97SHong Zhang 1631393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1641393bc97SHong Zhang nlnk = n + 1; 1659566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 1661393bc97SHong Zhang 1679566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 1681393bc97SHong Zhang 1691393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 170d3d32019SBarry Smith f = info->fill; 171aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 1729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1731393bc97SHong Zhang current_space = free_space; 174289bc588SBarry Smith 175289bc588SBarry Smith for (i = 0; i < n; i++) { 1761393bc97SHong Zhang /* copy previous fill into linked list */ 177289bc588SBarry Smith nzi = 0; 1781393bc97SHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 1791393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 1809566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1811393bc97SHong Zhang nzi += nlnk; 1821393bc97SHong Zhang 1831393bc97SHong Zhang /* add pivot rows into linked list */ 1841393bc97SHong Zhang row = lnk[n]; 1851393bc97SHong Zhang while (row < i) { 186d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 1889566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 1891393bc97SHong Zhang nzi += nlnk; 1901393bc97SHong Zhang row = lnk[row]; 191289bc588SBarry Smith } 1921393bc97SHong Zhang bi[i + 1] = bi[i] + nzi; 1931393bc97SHong Zhang im[i] = nzi; 1941393bc97SHong Zhang 1951393bc97SHong Zhang /* mark bdiag */ 1961393bc97SHong Zhang nzbd = 0; 1971393bc97SHong Zhang nnz = nzi; 1981393bc97SHong Zhang k = lnk[n]; 1991393bc97SHong Zhang while (nnz-- && k < i) { 2001393bc97SHong Zhang nzbd++; 2011393bc97SHong Zhang k = lnk[k]; 2021393bc97SHong Zhang } 2031393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2041393bc97SHong Zhang 2051393bc97SHong Zhang /* if free space is not available, make more free space */ 2061393bc97SHong Zhang if (current_space->local_remaining < nzi) { 207f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 2089566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2091393bc97SHong Zhang reallocs++; 2101393bc97SHong Zhang } 2111393bc97SHong Zhang 2121393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2139566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 2142205254eSKarl Rupp 2151393bc97SHong Zhang bi_ptr[i] = current_space->array; 2161393bc97SHong Zhang current_space->array += nzi; 2171393bc97SHong Zhang current_space->local_used += nzi; 2181393bc97SHong Zhang current_space->local_remaining -= nzi; 219289bc588SBarry Smith } 2206cf91177SBarry Smith #if defined(PETSC_USE_INFO) 221464e8d44SSatish Balay if (ai[n] != 0) { 2221393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2239566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 2269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 22705bf559cSSatish Balay } else { 2289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 22905bf559cSSatish Balay } 23063ba0a88SBarry Smith #endif 2312fb3ed76SBarry Smith 2329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2339566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 2341987afe7SBarry Smith 2351393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2379566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 2389566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 240289bc588SBarry Smith 241289bc588SBarry Smith /* put together the new matrix */ 2429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 243719d5645SBarry Smith b = (Mat_SeqAIJ *)(B)->data; 2442205254eSKarl Rupp 245e6b907acSBarry Smith b->free_a = PETSC_TRUE; 246e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2477c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2482205254eSKarl Rupp 2499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 2501393bc97SHong Zhang b->j = bj; 2511393bc97SHong Zhang b->i = bi; 2521393bc97SHong Zhang b->diag = bdiag; 253f4259b30SLisandro Dalcin b->ilen = NULL; 254f4259b30SLisandro Dalcin b->imax = NULL; 255416022c9SBarry Smith b->row = isrow; 256416022c9SBarry Smith b->col = iscol; 2579566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 2589566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 25982bf6240SBarry Smith b->icol = isicol; 2609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2611393bc97SHong Zhang 2621393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 2631393bc97SHong Zhang b->maxnz = b->nz = bi[n]; 2647fd98bd6SLois Curfman McInnes 265d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 266719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 267719d5645SBarry Smith (B)->info.fill_ratio_given = f; 268703deb49SSatish Balay 2699f7d0b68SHong Zhang if (ai[n]) { 270719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 271eea2913aSSatish Balay } else { 272719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 273eea2913aSSatish Balay } 274ad04f41aSHong Zhang (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 275ad540459SPierre Jolivet if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2763a40ed3dSBarry Smith PetscFunctionReturn(0); 277289bc588SBarry Smith } 2781393bc97SHong Zhang 2799371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 280ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 281ce3d78c0SShri Abhyankar IS isicol; 2828758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 2838758e1faSBarry Smith PetscInt i, n = A->rmap->n; 2848758e1faSBarry Smith PetscInt *bi, *bj; 285ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 286ce3d78c0SShri Abhyankar PetscReal f; 287ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 2880298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 289ce3d78c0SShri Abhyankar PetscBT lnkbt; 290ece542f9SHong Zhang PetscBool missing; 291ce3d78c0SShri Abhyankar 292ce3d78c0SShri Abhyankar PetscFunctionBegin; 29308401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 2949566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 29528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 296ece542f9SHong Zhang 2979566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 2989566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2999566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 300ce3d78c0SShri Abhyankar 3011bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 304a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 3059b48462bSShri Abhyankar 3069b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 3079b48462bSShri Abhyankar nlnk = n + 1; 3089566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3099b48462bSShri Abhyankar 3109566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 3119b48462bSShri Abhyankar 3129b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 3139b48462bSShri Abhyankar f = info->fill; 314aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 3159566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 3169b48462bSShri Abhyankar current_space = free_space; 3179b48462bSShri Abhyankar 3189b48462bSShri Abhyankar for (i = 0; i < n; i++) { 3199b48462bSShri Abhyankar /* copy previous fill into linked list */ 3209b48462bSShri Abhyankar nzi = 0; 3219b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 3229b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 3239566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3249b48462bSShri Abhyankar nzi += nlnk; 3259b48462bSShri Abhyankar 3269b48462bSShri Abhyankar /* add pivot rows into linked list */ 3279b48462bSShri Abhyankar row = lnk[n]; 3289b48462bSShri Abhyankar while (row < i) { 3299b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 3309b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3319566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 3329b48462bSShri Abhyankar nzi += nlnk; 3339b48462bSShri Abhyankar row = lnk[row]; 3349b48462bSShri Abhyankar } 3359b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 3369b48462bSShri Abhyankar im[i] = nzi; 3379b48462bSShri Abhyankar 3389b48462bSShri Abhyankar /* mark bdiag */ 3399b48462bSShri Abhyankar nzbd = 0; 3409b48462bSShri Abhyankar nnz = nzi; 3419b48462bSShri Abhyankar k = lnk[n]; 3429b48462bSShri Abhyankar while (nnz-- && k < i) { 3439b48462bSShri Abhyankar nzbd++; 3449b48462bSShri Abhyankar k = lnk[k]; 3459b48462bSShri Abhyankar } 3469b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 3479b48462bSShri Abhyankar 3489b48462bSShri Abhyankar /* if free space is not available, make more free space */ 3499b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 3502f18eb33SBarry Smith /* estimated additional space needed */ 3511df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 3529566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 3539b48462bSShri Abhyankar reallocs++; 3549b48462bSShri Abhyankar } 3559b48462bSShri Abhyankar 3569b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 3579566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 3582205254eSKarl Rupp 3599b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 3609b48462bSShri Abhyankar current_space->array += nzi; 3619b48462bSShri Abhyankar current_space->local_used += nzi; 3629b48462bSShri Abhyankar current_space->local_remaining -= nzi; 3639b48462bSShri Abhyankar } 3649b48462bSShri Abhyankar 3659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3679b48462bSShri Abhyankar 3689263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 3699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 3709566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 3719566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 3729566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 3739b48462bSShri Abhyankar 3749b48462bSShri Abhyankar /* put together the new matrix */ 3759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3769b48462bSShri Abhyankar b = (Mat_SeqAIJ *)(B)->data; 3772205254eSKarl Rupp 3789b48462bSShri Abhyankar b->free_a = PETSC_TRUE; 3799b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 3809b48462bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 3812205254eSKarl Rupp 3829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 3832205254eSKarl Rupp 3849b48462bSShri Abhyankar b->j = bj; 3859b48462bSShri Abhyankar b->i = bi; 3869b48462bSShri Abhyankar b->diag = bdiag; 387f4259b30SLisandro Dalcin b->ilen = NULL; 388f4259b30SLisandro Dalcin b->imax = NULL; 3899b48462bSShri Abhyankar b->row = isrow; 3909b48462bSShri Abhyankar b->col = iscol; 3919566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3929566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3939b48462bSShri Abhyankar b->icol = isicol; 3949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3959b48462bSShri Abhyankar 3969b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 3979b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 3982205254eSKarl Rupp 399d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 400f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 401f284f12aSHong Zhang B->info.fill_ratio_given = f; 4029b48462bSShri Abhyankar 4039b48462bSShri Abhyankar if (ai[n]) { 404f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 4059b48462bSShri Abhyankar } else { 406f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 4079b48462bSShri Abhyankar } 4089263d837SHong Zhang #if defined(PETSC_USE_INFO) 4099263d837SHong Zhang if (ai[n] != 0) { 4109263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 4119566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4139566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4149566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 4159263d837SHong Zhang } else { 4169566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 4179263d837SHong Zhang } 4189263d837SHong Zhang #endif 41935233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 420ad540459SPierre Jolivet if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 4219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 4229b48462bSShri Abhyankar PetscFunctionReturn(0); 4239b48462bSShri Abhyankar } 4249b48462bSShri Abhyankar 4255b5bc046SBarry Smith /* 4265b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4275b5bc046SBarry Smith */ 4289371c9d4SSatish Balay PetscErrorCode MatFactorDumpMatrix(Mat A) { 429ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 4305b5bc046SBarry Smith 4315b5bc046SBarry Smith PetscFunctionBegin; 4329566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 4335b5bc046SBarry Smith if (flg) { 4345b5bc046SBarry Smith PetscViewer viewer; 4355b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4365b5bc046SBarry Smith 4379566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 4389566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 4399566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 4409566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 4415b5bc046SBarry Smith } 4425b5bc046SBarry Smith PetscFunctionReturn(0); 4435b5bc046SBarry Smith } 4445b5bc046SBarry Smith 4459371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 446c9c72f8fSHong Zhang Mat C = B; 447c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 448c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 449c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 450bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 451bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 452bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 453bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 454bbd65245SShri Abhyankar const MatScalar *aa = a->a, *v; 455ace3abfcSBarry Smith PetscBool row_identity, col_identity; 456d90ac83dSHong Zhang FactorShiftCtx sctx; 4574f81c4b7SBarry Smith const PetscInt *ddiag; 458d4ad06f7SHong Zhang PetscReal rs; 459d4ad06f7SHong Zhang MatScalar d; 460d4ad06f7SHong Zhang 461c9c72f8fSHong Zhang PetscFunctionBegin; 462d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4639566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 464d4ad06f7SHong Zhang 465f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 466d4ad06f7SHong Zhang ddiag = a->diag; 467d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 468d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 469d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 470d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 471d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 472d4ad06f7SHong Zhang v = aa + ai[i]; 473d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 4742205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 475d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 476d4ad06f7SHong Zhang } 477d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 478d4ad06f7SHong Zhang sctx.nshift_max = 5; 479d4ad06f7SHong Zhang sctx.shift_lo = 0.; 480d4ad06f7SHong Zhang sctx.shift_hi = 1.; 481d4ad06f7SHong Zhang } 482d4ad06f7SHong Zhang 4839566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 4849566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 4859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 486c9c72f8fSHong Zhang ics = ic; 487c9c72f8fSHong Zhang 488d4ad06f7SHong Zhang do { 48907b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 490c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 491c9c72f8fSHong Zhang /* zero rtmp */ 492c9c72f8fSHong Zhang /* L part */ 493c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 494c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 495c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 496c9c72f8fSHong Zhang 497c9c72f8fSHong Zhang /* U part */ 49862fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 49962fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 500813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 501813a1f2bSShri Abhyankar 502813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 503813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 504813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 505813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 506ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 507813a1f2bSShri Abhyankar /* ZeropivotApply() */ 50898a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 509813a1f2bSShri Abhyankar 510813a1f2bSShri Abhyankar /* elimination */ 511813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 512813a1f2bSShri Abhyankar row = *bjtmp++; 513813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 514f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 515813a1f2bSShri Abhyankar pc = rtmp + row; 516813a1f2bSShri Abhyankar if (*pc != 0.0) { 517813a1f2bSShri Abhyankar pv = b->a + bdiag[row]; 518813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 519813a1f2bSShri Abhyankar *pc = multiplier; 5202205254eSKarl Rupp 52162fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 52262fbd6afSShri Abhyankar pv = b->a + bdiag[row + 1] + 1; 52362fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 5242205254eSKarl Rupp 525813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 5269566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 527813a1f2bSShri Abhyankar } 528f268cba8SShri Abhyankar row = *bjtmp++; 529813a1f2bSShri Abhyankar } 530813a1f2bSShri Abhyankar 531813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 532813a1f2bSShri Abhyankar rs = 0.0; 533813a1f2bSShri Abhyankar /* L part */ 534813a1f2bSShri Abhyankar pv = b->a + bi[i]; 535813a1f2bSShri Abhyankar pj = b->j + bi[i]; 536813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 537813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5389371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5399371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 540813a1f2bSShri Abhyankar } 541813a1f2bSShri Abhyankar 542813a1f2bSShri Abhyankar /* U part */ 54362fbd6afSShri Abhyankar pv = b->a + bdiag[i + 1] + 1; 54462fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 54562fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 546813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5479371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5489371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 549813a1f2bSShri Abhyankar } 550813a1f2bSShri Abhyankar 551813a1f2bSShri Abhyankar sctx.rs = rs; 552813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 5539566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 55407b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 55507b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 556813a1f2bSShri Abhyankar 557a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 558813a1f2bSShri Abhyankar pv = b->a + bdiag[i]; 559813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 560813a1f2bSShri Abhyankar 561813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 562813a1f2bSShri Abhyankar 5638ff23777SHong Zhang /* MatPivotRefine() */ 56407b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 565813a1f2bSShri Abhyankar /* 566813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 567813a1f2bSShri Abhyankar * then try lower shift 568813a1f2bSShri Abhyankar */ 569813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 570813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 571813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 57207b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 573813a1f2bSShri Abhyankar sctx.nshift++; 574813a1f2bSShri Abhyankar } 57507b50cabSHong Zhang } while (sctx.newshift); 576813a1f2bSShri Abhyankar 5779566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 580d3ac4fa3SBarry Smith 5819566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5829566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 583abb87a52SBarry Smith if (b->inode.size) { 584abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 585abb87a52SBarry Smith } else if (row_identity && col_identity) { 58635233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 587813a1f2bSShri Abhyankar } else { 58835233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 589813a1f2bSShri Abhyankar } 59035233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 59135233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 59235233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 59335233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 594813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 595813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 5962205254eSKarl Rupp 5979566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 598813a1f2bSShri Abhyankar 59914d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 600813a1f2bSShri Abhyankar if (sctx.nshift) { 601f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 6029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 603f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 6049566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 605f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 6069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 607813a1f2bSShri Abhyankar } 608813a1f2bSShri Abhyankar } 609813a1f2bSShri Abhyankar PetscFunctionReturn(0); 610813a1f2bSShri Abhyankar } 611813a1f2bSShri Abhyankar 6129371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 613719d5645SBarry Smith Mat C = B; 614416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 61582bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 6165d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 617d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 618d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 619d278edc7SBarry Smith const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 620d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 621d278edc7SBarry Smith const MatScalar *v, *aa = a->a; 62233f9893dSHong Zhang PetscReal rs = 0.0; 623d90ac83dSHong Zhang FactorShiftCtx sctx; 6244f81c4b7SBarry Smith const PetscInt *ddiag; 625ace3abfcSBarry Smith PetscBool row_identity, col_identity; 626289bc588SBarry Smith 6273a40ed3dSBarry Smith PetscFunctionBegin; 628504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 6299566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 630ada7143aSSatish Balay 631f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 63242898933SHong Zhang ddiag = a->diag; 6339f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 6346cc28720Svictorle for (i = 0; i < n; i++) { 6359f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 6369f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 6371a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 6389f7d0b68SHong Zhang v = aa + ai[i]; 6399f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 6402205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 6411a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 6426cc28720Svictorle } 643118f5789SBarry Smith sctx.shift_top *= 1.1; 644d2276718SHong Zhang sctx.nshift_max = 5; 645d2276718SHong Zhang sctx.shift_lo = 0.; 646d2276718SHong Zhang sctx.shift_hi = 1.; 647a0a356efSHong Zhang } 648a0a356efSHong Zhang 6499566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6509566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 652504a3fadSHong Zhang ics = ic; 653504a3fadSHong Zhang 654085256b3SBarry Smith do { 65507b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 656289bc588SBarry Smith for (i = 0; i < n; i++) { 657b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 658b3bf805bSHong Zhang bjtmp = bj + bi[i]; 6599f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 660289bc588SBarry Smith 661289bc588SBarry Smith /* load in initial (unfactored row) */ 6629f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 6639f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 6649f7d0b68SHong Zhang v = aa + ai[r[i]]; 665ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 666d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 667289bc588SBarry Smith 668b3bf805bSHong Zhang row = *bjtmp++; 669f2582111SSatish Balay while (row < i) { 6708c37ef55SBarry Smith pc = rtmp + row; 6718c37ef55SBarry Smith if (*pc != 0.0) { 672010a20caSHong Zhang pv = b->a + diag_offset[row]; 673010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 67435aab85fSBarry Smith multiplier = *pc / *pv++; 6758c37ef55SBarry Smith *pc = multiplier; 676b3bf805bSHong Zhang nz = bi[row + 1] - diag_offset[row] - 1; 6779f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6789566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 679289bc588SBarry Smith } 680b3bf805bSHong Zhang row = *bjtmp++; 681289bc588SBarry Smith } 682416022c9SBarry Smith /* finished row so stick it into b->a */ 683b3bf805bSHong Zhang pv = b->a + bi[i]; 684b3bf805bSHong Zhang pj = b->j + bi[i]; 685b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 686b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 687e57ff13aSBarry Smith rs = 0.0; 688d3d32019SBarry Smith for (j = 0; j < nz; j++) { 6899f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 6909f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 691d3d32019SBarry Smith } 692e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 693d2276718SHong Zhang 694d2276718SHong Zhang sctx.rs = rs; 695d2276718SHong Zhang sctx.pv = pv[diag]; 6969566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 69707b50cabSHong Zhang if (sctx.newshift) break; 698504a3fadSHong Zhang pv[diag] = sctx.pv; 69935aab85fSBarry Smith } 700d2276718SHong Zhang 70107b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 7026cc28720Svictorle /* 7036c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 7046cc28720Svictorle * then try lower shift 7056cc28720Svictorle */ 7060481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 7070481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 7080481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 70907b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 710d2276718SHong Zhang sctx.nshift++; 7116cc28720Svictorle } 71207b50cabSHong Zhang } while (sctx.newshift); 7130f11f9aeSSatish Balay 714a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 715ad540459SPierre Jolivet for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 7169566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7179566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7189566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 719d3ac4fa3SBarry Smith 7209566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 7219566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 7228d8c7c43SBarry Smith if (row_identity && col_identity) { 723ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 7248d8c7c43SBarry Smith } else { 725ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 726db4efbfdSBarry Smith } 727ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 728ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 729ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 730ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 7312205254eSKarl Rupp 732c456f294SBarry Smith C->assembled = PETSC_TRUE; 7335c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 7342205254eSKarl Rupp 7359566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 736d2276718SHong Zhang if (sctx.nshift) { 737f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 739f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 7416cc28720Svictorle } 7420697b6caSHong Zhang } 743d3ac4fa3SBarry Smith (C)->ops->solve = MatSolve_SeqAIJ_inplace; 744d3ac4fa3SBarry Smith (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 7452205254eSKarl Rupp 7469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 7473a40ed3dSBarry Smith PetscFunctionReturn(0); 748289bc588SBarry Smith } 7490889b5dcSvictorle 750137fb511SHong Zhang /* 751137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 752137fb511SHong Zhang Input: 753137fb511SHong Zhang A - original matrix 754137fb511SHong Zhang Output; 755137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 756137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 757137fb511SHong Zhang a->a reordered accordingly with a->j 758137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 759137fb511SHong Zhang */ 7609371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) { 761b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 762b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 7635d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 7645d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 7655d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 766b051339dSHong Zhang PetscInt *diag = a->diag, nbdiag, *pj; 767a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 768504a3fadSHong Zhang MatScalar *pv, *v; 769137fb511SHong Zhang PetscReal rs; 770d90ac83dSHong Zhang FactorShiftCtx sctx; 771504a3fadSHong Zhang const MatScalar *aa = a->a, *vtmp; 772137fb511SHong Zhang 773137fb511SHong Zhang PetscFunctionBegin; 77408401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 775504a3fadSHong Zhang 776504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 7779566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 778504a3fadSHong Zhang 779504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 780504a3fadSHong Zhang const PetscInt *ddiag = a->diag; 781504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 782504a3fadSHong Zhang for (i = 0; i < n; i++) { 783504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 784504a3fadSHong Zhang d = (aa)[ddiag[i]]; 785504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 786504a3fadSHong Zhang vtmp = aa + ai[i]; 787504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 7882205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 789504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 790504a3fadSHong Zhang } 791504a3fadSHong Zhang sctx.shift_top *= 1.1; 792504a3fadSHong Zhang sctx.nshift_max = 5; 793504a3fadSHong Zhang sctx.shift_lo = 0.; 794504a3fadSHong Zhang sctx.shift_hi = 1.; 795504a3fadSHong Zhang } 796504a3fadSHong Zhang 7979566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 7989566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 7999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 8009566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 801b051339dSHong Zhang ics = ic; 802137fb511SHong Zhang 803504a3fadSHong Zhang #if defined(MV) 80475567043SBarry Smith sctx.shift_top = 0.; 805e60cf9a0SBarry Smith sctx.nshift_max = 0; 80675567043SBarry Smith sctx.shift_lo = 0.; 80775567043SBarry Smith sctx.shift_hi = 0.; 80875567043SBarry Smith sctx.shift_fraction = 0.; 809137fb511SHong Zhang 810f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 81175567043SBarry Smith sctx.shift_top = 0.; 812137fb511SHong Zhang for (i = 0; i < n; i++) { 813137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 814b051339dSHong Zhang d = (a->a)[diag[i]]; 815137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 816b051339dSHong Zhang v = a->a + ai[i]; 817b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 8182205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 819137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 820137fb511SHong Zhang } 821137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 822137fb511SHong Zhang sctx.shift_top *= 1.1; 823137fb511SHong Zhang sctx.nshift_max = 5; 824137fb511SHong Zhang sctx.shift_lo = 0.; 825137fb511SHong Zhang sctx.shift_hi = 1.; 826137fb511SHong Zhang } 827137fb511SHong Zhang 82875567043SBarry Smith sctx.shift_amount = 0.; 829137fb511SHong Zhang sctx.nshift = 0; 830504a3fadSHong Zhang #endif 831504a3fadSHong Zhang 832137fb511SHong Zhang do { 83307b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 834137fb511SHong Zhang for (i = 0; i < n; i++) { 835b051339dSHong Zhang /* load in initial unfactored row */ 836b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 837b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 838b051339dSHong Zhang v = a->a + ai[r[i]]; 839137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 840b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 8419566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 842137fb511SHong Zhang 843b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 844137fb511SHong Zhang for (j = 0; j < nz; j++) { 845137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 846b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 847137fb511SHong Zhang } 848137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 849137fb511SHong Zhang 850b051339dSHong Zhang row = *ajtmp++; 851137fb511SHong Zhang while (row < i) { 852137fb511SHong Zhang pc = rtmp + row; 853137fb511SHong Zhang if (*pc != 0.0) { 854b051339dSHong Zhang pv = a->a + diag[r[row]]; 855b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 856137fb511SHong Zhang 857137fb511SHong Zhang multiplier = *pc / *pv++; 858137fb511SHong Zhang *pc = multiplier; 859b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 860b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 8619566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 862137fb511SHong Zhang } 863b051339dSHong Zhang row = *ajtmp++; 864137fb511SHong Zhang } 865b051339dSHong Zhang /* finished row so overwrite it onto a->a */ 866b051339dSHong Zhang pv = a->a + ai[r[i]]; 867b051339dSHong Zhang pj = aj + ai[r[i]]; 868b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 869b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 870137fb511SHong Zhang 871137fb511SHong Zhang rs = 0.0; 872137fb511SHong Zhang for (j = 0; j < nz; j++) { 873b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 874b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 875137fb511SHong Zhang } 876137fb511SHong Zhang 877137fb511SHong Zhang sctx.rs = rs; 878b051339dSHong Zhang sctx.pv = pv[nbdiag]; 8799566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 88007b50cabSHong Zhang if (sctx.newshift) break; 881504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 882137fb511SHong Zhang } 883137fb511SHong Zhang 88407b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 885137fb511SHong Zhang /* 886137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 887137fb511SHong Zhang * then try lower shift 888137fb511SHong Zhang */ 8890481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 8900481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 8910481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 89207b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 893137fb511SHong Zhang sctx.nshift++; 894137fb511SHong Zhang } 89507b50cabSHong Zhang } while (sctx.newshift); 896137fb511SHong Zhang 897a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 898ad540459SPierre Jolivet for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 899137fb511SHong Zhang 9009566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 9019566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 9029566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 9032205254eSKarl Rupp 904b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 905ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 906ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 907ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 9082205254eSKarl Rupp 909b051339dSHong Zhang A->assembled = PETSC_TRUE; 9105c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 9112205254eSKarl Rupp 9129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 913137fb511SHong Zhang if (sctx.nshift) { 914f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 9159566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 916f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 9179566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 918137fb511SHong Zhang } 919137fb511SHong Zhang } 920137fb511SHong Zhang PetscFunctionReturn(0); 921137fb511SHong Zhang } 922137fb511SHong Zhang 9230a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9249371c9d4SSatish Balay PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) { 925416022c9SBarry Smith Mat C; 926416022c9SBarry Smith 9273a40ed3dSBarry Smith PetscFunctionBegin; 9289566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 9299566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 9309566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 9312205254eSKarl Rupp 932db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 933db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 9342205254eSKarl Rupp 9359566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 9363a40ed3dSBarry Smith PetscFunctionReturn(0); 937da3a660dSBarry Smith } 9380a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9391d098868SHong Zhang 9409371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 941416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 942416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 9435d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 9445d0c19d7SBarry Smith PetscInt nz; 9455d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 946dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 947d9fead3dSBarry Smith const PetscScalar *b; 948dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 9498c37ef55SBarry Smith 9503a40ed3dSBarry Smith PetscFunctionBegin; 9513a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 95291bf9adeSBarry Smith 9539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 9549566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 955416022c9SBarry Smith tmp = a->solve_work; 9568c37ef55SBarry Smith 9579371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 9589371c9d4SSatish Balay r = rout; 9599371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 9609371c9d4SSatish Balay c = cout + (n - 1); 9618c37ef55SBarry Smith 9628c37ef55SBarry Smith /* forward solve the lower triangular */ 9638c37ef55SBarry Smith tmp[0] = b[*r++]; 964010a20caSHong Zhang tmps = tmp; 9658c37ef55SBarry Smith for (i = 1; i < n; i++) { 966010a20caSHong Zhang v = aa + ai[i]; 967010a20caSHong Zhang vi = aj + ai[i]; 96853e7425aSBarry Smith nz = a->diag[i] - ai[i]; 96953e7425aSBarry Smith sum = b[*r++]; 970003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 9718c37ef55SBarry Smith tmp[i] = sum; 9728c37ef55SBarry Smith } 9738c37ef55SBarry Smith 9748c37ef55SBarry Smith /* backward solve the upper triangular */ 9758c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 976010a20caSHong Zhang v = aa + a->diag[i] + 1; 977010a20caSHong Zhang vi = aj + a->diag[i] + 1; 978416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 9798c37ef55SBarry Smith sum = tmp[i]; 980003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 981010a20caSHong Zhang x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 9828c37ef55SBarry Smith } 9838c37ef55SBarry Smith 9849566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 9859566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 9869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 9879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 9889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 9893a40ed3dSBarry Smith PetscFunctionReturn(0); 9908c37ef55SBarry Smith } 991026e39d0SSatish Balay 9929371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) { 9932bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 9942bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 9955d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 996910cf402Sprj- PetscInt nz, neq, ldb, ldx; 9975d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 998910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 999910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1000910cf402Sprj- PetscBool isdense; 10012bebee5dSHong Zhang 10022bebee5dSHong Zhang PetscFunctionBegin; 10032bebee5dSHong Zhang if (!n) PetscFunctionReturn(0); 10049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 100528b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1006f9fb9879SHong Zhang if (X != B) { 10079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 100828b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1009f9fb9879SHong Zhang } 10109566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10119566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10129566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10139566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10149371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10159371c9d4SSatish Balay r = rout; 10169371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10179371c9d4SSatish Balay c = cout; 1018a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 10192bebee5dSHong Zhang /* forward solve the lower triangular */ 10202bebee5dSHong Zhang tmp[0] = b[r[0]]; 10212bebee5dSHong Zhang tmps = tmp; 10222bebee5dSHong Zhang for (i = 1; i < n; i++) { 10232bebee5dSHong Zhang v = aa + ai[i]; 10242bebee5dSHong Zhang vi = aj + ai[i]; 10252bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 10262bebee5dSHong Zhang sum = b[r[i]]; 1027003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10282bebee5dSHong Zhang tmp[i] = sum; 10292bebee5dSHong Zhang } 10302bebee5dSHong Zhang /* backward solve the upper triangular */ 10312bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 10322bebee5dSHong Zhang v = aa + a->diag[i] + 1; 10332bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 10342bebee5dSHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 10352bebee5dSHong Zhang sum = tmp[i]; 1036003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10372bebee5dSHong Zhang x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 10382bebee5dSHong Zhang } 1039910cf402Sprj- b += ldb; 1040910cf402Sprj- x += ldx; 10412bebee5dSHong Zhang } 10429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 10459566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 10469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 10472bebee5dSHong Zhang PetscFunctionReturn(0); 10482bebee5dSHong Zhang } 10492bebee5dSHong Zhang 10509371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) { 10519bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10529bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 10539bd0847aSShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1054910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10559bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 1056910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 1057910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1058910cf402Sprj- PetscBool isdense; 10599bd0847aSShri Abhyankar 10609bd0847aSShri Abhyankar PetscFunctionBegin; 10619bd0847aSShri Abhyankar if (!n) PetscFunctionReturn(0); 10629566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 106328b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1064f9fb9879SHong Zhang if (X != B) { 10659566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 106628b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1067f9fb9879SHong Zhang } 10689566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10699566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10709566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10719566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10729371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10739371c9d4SSatish Balay r = rout; 10749371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10759371c9d4SSatish Balay c = cout; 10769bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 10779bd0847aSShri Abhyankar /* forward solve the lower triangular */ 10789bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 10799bd0847aSShri Abhyankar v = aa; 10809bd0847aSShri Abhyankar vi = aj; 10819bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 10829bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 10839bd0847aSShri Abhyankar sum = b[r[i]]; 10849bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 10859bd0847aSShri Abhyankar tmp[i] = sum; 10869371c9d4SSatish Balay v += nz; 10879371c9d4SSatish Balay vi += nz; 10889bd0847aSShri Abhyankar } 10899bd0847aSShri Abhyankar /* backward solve the upper triangular */ 10909bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 10919bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 10929bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 10939bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 10949bd0847aSShri Abhyankar sum = tmp[i]; 10959bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 10969bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 10979bd0847aSShri Abhyankar } 1098910cf402Sprj- b += ldb; 1099910cf402Sprj- x += ldx; 11009bd0847aSShri Abhyankar } 11019566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11029566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 11049566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 11059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 11069bd0847aSShri Abhyankar PetscFunctionReturn(0); 11079bd0847aSShri Abhyankar } 11089bd0847aSShri Abhyankar 11099371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) { 1110137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1111137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 11125d0c19d7SBarry Smith const PetscInt *r, *c, *rout, *cout; 11135d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 11145d0c19d7SBarry Smith PetscInt nz, row; 1115fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1116fdc842d1SBarry Smith const PetscScalar *b; 1117dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1118137fb511SHong Zhang 1119137fb511SHong Zhang PetscFunctionBegin; 1120137fb511SHong Zhang if (!n) PetscFunctionReturn(0); 1121137fb511SHong Zhang 11229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11239566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1124137fb511SHong Zhang tmp = a->solve_work; 1125137fb511SHong Zhang 11269371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11279371c9d4SSatish Balay r = rout; 11289371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11299371c9d4SSatish Balay c = cout + (n - 1); 1130137fb511SHong Zhang 1131137fb511SHong Zhang /* forward solve the lower triangular */ 1132137fb511SHong Zhang tmp[0] = b[*r++]; 1133137fb511SHong Zhang tmps = tmp; 1134137fb511SHong Zhang for (row = 1; row < n; row++) { 1135137fb511SHong Zhang i = rout[row]; /* permuted row */ 1136137fb511SHong Zhang v = aa + ai[i]; 1137137fb511SHong Zhang vi = aj + ai[i]; 1138137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1139137fb511SHong Zhang sum = b[*r++]; 1140003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1141137fb511SHong Zhang tmp[row] = sum; 1142137fb511SHong Zhang } 1143137fb511SHong Zhang 1144137fb511SHong Zhang /* backward solve the upper triangular */ 1145137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1146137fb511SHong Zhang i = rout[row]; /* permuted row */ 1147137fb511SHong Zhang v = aa + a->diag[i] + 1; 1148137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1149137fb511SHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 1150137fb511SHong Zhang sum = tmp[row]; 1151003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1152137fb511SHong Zhang x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1153137fb511SHong Zhang } 1154137fb511SHong Zhang 11559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11569566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 11599566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1160137fb511SHong Zhang PetscFunctionReturn(0); 1161137fb511SHong Zhang } 1162137fb511SHong Zhang 1163930ae53cSBarry Smith /* ----------------------------------------------------------- */ 1164c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 11659371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 1166930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1167d0f46423SBarry Smith PetscInt n = A->rmap->n; 1168b377110cSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1169356650c2SBarry Smith PetscScalar *x; 1170356650c2SBarry Smith const PetscScalar *b; 1171dd6ea824SBarry Smith const MatScalar *aa = a->a; 1172aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1173356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1174b377110cSBarry Smith const PetscInt *vi; 1175dd6ea824SBarry Smith const MatScalar *v; 1176dd6ea824SBarry Smith PetscScalar sum; 1177d85afc42SSatish Balay #endif 1178930ae53cSBarry Smith 11793a40ed3dSBarry Smith PetscFunctionBegin; 11803a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 1181930ae53cSBarry Smith 11829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11839566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1184930ae53cSBarry Smith 1185aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 11861c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 11871c4feecaSSatish Balay #else 1188930ae53cSBarry Smith /* forward solve the lower triangular */ 1189930ae53cSBarry Smith x[0] = b[0]; 1190930ae53cSBarry Smith for (i = 1; i < n; i++) { 1191930ae53cSBarry Smith ai_i = ai[i]; 1192930ae53cSBarry Smith v = aa + ai_i; 1193930ae53cSBarry Smith vi = aj + ai_i; 1194930ae53cSBarry Smith nz = adiag[i] - ai_i; 1195930ae53cSBarry Smith sum = b[i]; 1196003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1197930ae53cSBarry Smith x[i] = sum; 1198930ae53cSBarry Smith } 1199930ae53cSBarry Smith 1200930ae53cSBarry Smith /* backward solve the upper triangular */ 1201930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1202930ae53cSBarry Smith adiag_i = adiag[i]; 1203d708749eSBarry Smith v = aa + adiag_i + 1; 1204d708749eSBarry Smith vi = aj + adiag_i + 1; 1205930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1206930ae53cSBarry Smith sum = x[i]; 1207003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1208930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1209930ae53cSBarry Smith } 12101c4feecaSSatish Balay #endif 12119566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12139566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 12143a40ed3dSBarry Smith PetscFunctionReturn(0); 1215930ae53cSBarry Smith } 1216930ae53cSBarry Smith 12179371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) { 1218416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1219416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 122004fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 12215d0c19d7SBarry Smith PetscInt nz; 122204fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 122304fbf559SBarry Smith PetscScalar *x, *tmp, sum; 122404fbf559SBarry Smith const PetscScalar *b; 1225dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1226da3a660dSBarry Smith 12273a40ed3dSBarry Smith PetscFunctionBegin; 12289566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1229da3a660dSBarry Smith 12309566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12319566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1232416022c9SBarry Smith tmp = a->solve_work; 1233da3a660dSBarry Smith 12349371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12359371c9d4SSatish Balay r = rout; 12369371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12379371c9d4SSatish Balay c = cout + (n - 1); 1238da3a660dSBarry Smith 1239da3a660dSBarry Smith /* forward solve the lower triangular */ 1240da3a660dSBarry Smith tmp[0] = b[*r++]; 1241da3a660dSBarry Smith for (i = 1; i < n; i++) { 1242010a20caSHong Zhang v = aa + ai[i]; 1243010a20caSHong Zhang vi = aj + ai[i]; 1244416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1245da3a660dSBarry Smith sum = b[*r++]; 124604fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1247da3a660dSBarry Smith tmp[i] = sum; 1248da3a660dSBarry Smith } 1249da3a660dSBarry Smith 1250da3a660dSBarry Smith /* backward solve the upper triangular */ 1251da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1252010a20caSHong Zhang v = aa + a->diag[i] + 1; 1253010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1254416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 1255da3a660dSBarry Smith sum = tmp[i]; 125604fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1257010a20caSHong Zhang tmp[i] = sum * aa[a->diag[i]]; 1258da3a660dSBarry Smith x[*c--] += tmp[i]; 1259da3a660dSBarry Smith } 1260da3a660dSBarry Smith 12619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12629566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12659566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 12663a40ed3dSBarry Smith PetscFunctionReturn(0); 1267da3a660dSBarry Smith } 126804fbf559SBarry Smith 12699371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) { 12703c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 12713c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 12723c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 12733c0119dfSShri Abhyankar PetscInt nz; 12743c0119dfSShri Abhyankar const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 12753c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 12763c0119dfSShri Abhyankar const PetscScalar *b; 12773c0119dfSShri Abhyankar const MatScalar *aa = a->a, *v; 12783c0119dfSShri Abhyankar 12793c0119dfSShri Abhyankar PetscFunctionBegin; 12809566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 12813c0119dfSShri Abhyankar 12829566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12839566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 12843c0119dfSShri Abhyankar tmp = a->solve_work; 12853c0119dfSShri Abhyankar 12869371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12879371c9d4SSatish Balay r = rout; 12889371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12899371c9d4SSatish Balay c = cout; 12903c0119dfSShri Abhyankar 12913c0119dfSShri Abhyankar /* forward solve the lower triangular */ 12923c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 12933c0119dfSShri Abhyankar v = aa; 12943c0119dfSShri Abhyankar vi = aj; 12953c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 12963c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 12973c0119dfSShri Abhyankar sum = b[r[i]]; 12983c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 12993c0119dfSShri Abhyankar tmp[i] = sum; 13002205254eSKarl Rupp v += nz; 13012205254eSKarl Rupp vi += nz; 13023c0119dfSShri Abhyankar } 13033c0119dfSShri Abhyankar 13043c0119dfSShri Abhyankar /* backward solve the upper triangular */ 13053c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 13063c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 13073c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 13083c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 13093c0119dfSShri Abhyankar sum = tmp[i]; 13103c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 13113c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 13123c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 13139371c9d4SSatish Balay v += nz + 1; 13149371c9d4SSatish Balay vi += nz + 1; 13153c0119dfSShri Abhyankar } 13163c0119dfSShri Abhyankar 13179566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13189566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13209566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13219566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 13223c0119dfSShri Abhyankar PetscFunctionReturn(0); 13233c0119dfSShri Abhyankar } 13243c0119dfSShri Abhyankar 13259371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 1326416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13272235e667SBarry Smith IS iscol = a->col, isrow = a->row; 132804fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 132904fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 133004fbf559SBarry Smith PetscInt nz; 133104fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1332dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 133304fbf559SBarry Smith const PetscScalar *b; 1334da3a660dSBarry Smith 13353a40ed3dSBarry Smith PetscFunctionBegin; 13369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13379566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1338416022c9SBarry Smith tmp = a->solve_work; 1339da3a660dSBarry Smith 13409371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13419371c9d4SSatish Balay r = rout; 13429371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13439371c9d4SSatish Balay c = cout; 1344da3a660dSBarry Smith 1345da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 13462235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1347da3a660dSBarry Smith 1348da3a660dSBarry Smith /* forward solve the U^T */ 1349da3a660dSBarry Smith for (i = 0; i < n; i++) { 1350010a20caSHong Zhang v = aa + diag[i]; 1351010a20caSHong Zhang vi = aj + diag[i] + 1; 1352f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1353c38d4ed2SBarry Smith s1 = tmp[i]; 1354ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 135504fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1356c38d4ed2SBarry Smith tmp[i] = s1; 1357da3a660dSBarry Smith } 1358da3a660dSBarry Smith 1359da3a660dSBarry Smith /* backward solve the L^T */ 1360da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1361010a20caSHong Zhang v = aa + diag[i] - 1; 1362010a20caSHong Zhang vi = aj + diag[i] - 1; 1363f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1364f1af5d2fSBarry Smith s1 = tmp[i]; 136504fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1366da3a660dSBarry Smith } 1367da3a660dSBarry Smith 1368da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1369591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1370da3a660dSBarry Smith 13719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1375da3a660dSBarry Smith 13769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 13773a40ed3dSBarry Smith PetscFunctionReturn(0); 1378da3a660dSBarry Smith } 1379da3a660dSBarry Smith 13809371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) { 1381d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1382d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1383d1fa9404SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1384d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1385d1fa9404SShri Abhyankar PetscInt nz; 1386d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1387d1fa9404SShri Abhyankar const MatScalar *aa = a->a, *v; 1388d1fa9404SShri Abhyankar const PetscScalar *b; 1389d1fa9404SShri Abhyankar 1390d1fa9404SShri Abhyankar PetscFunctionBegin; 13919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13929566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1393d1fa9404SShri Abhyankar tmp = a->solve_work; 1394d1fa9404SShri Abhyankar 13959371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13969371c9d4SSatish Balay r = rout; 13979371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13989371c9d4SSatish Balay c = cout; 1399d1fa9404SShri Abhyankar 1400d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1401d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1402d1fa9404SShri Abhyankar 1403d1fa9404SShri Abhyankar /* forward solve the U^T */ 1404d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1405d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1406d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1407d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1408d1fa9404SShri Abhyankar s1 = tmp[i]; 1409d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1410d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1411d1fa9404SShri Abhyankar tmp[i] = s1; 1412d1fa9404SShri Abhyankar } 1413d1fa9404SShri Abhyankar 1414d1fa9404SShri Abhyankar /* backward solve the L^T */ 1415d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1416d1fa9404SShri Abhyankar v = aa + ai[i]; 1417d1fa9404SShri Abhyankar vi = aj + ai[i]; 1418d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1419d1fa9404SShri Abhyankar s1 = tmp[i]; 1420d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1421d1fa9404SShri Abhyankar } 1422d1fa9404SShri Abhyankar 1423d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1424d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1425d1fa9404SShri Abhyankar 14269566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14279566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14289566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1430d1fa9404SShri Abhyankar 14319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1432d1fa9404SShri Abhyankar PetscFunctionReturn(0); 1433d1fa9404SShri Abhyankar } 1434d1fa9404SShri Abhyankar 14359371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) { 1436416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 14372235e667SBarry Smith IS iscol = a->col, isrow = a->row; 143804fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 143904fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 144004fbf559SBarry Smith PetscInt nz; 144104fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1442dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 144304fbf559SBarry Smith const PetscScalar *b; 14446abc6512SBarry Smith 14453a40ed3dSBarry Smith PetscFunctionBegin; 14469566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 14479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14489566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1449416022c9SBarry Smith tmp = a->solve_work; 14506abc6512SBarry Smith 14519371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14529371c9d4SSatish Balay r = rout; 14539371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14549371c9d4SSatish Balay c = cout; 14556abc6512SBarry Smith 14566abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 14572235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 14586abc6512SBarry Smith 14596abc6512SBarry Smith /* forward solve the U^T */ 14606abc6512SBarry Smith for (i = 0; i < n; i++) { 1461010a20caSHong Zhang v = aa + diag[i]; 1462010a20caSHong Zhang vi = aj + diag[i] + 1; 1463f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 146404fbf559SBarry Smith s1 = tmp[i]; 146504fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 146604fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 146704fbf559SBarry Smith tmp[i] = s1; 14686abc6512SBarry Smith } 14696abc6512SBarry Smith 14706abc6512SBarry Smith /* backward solve the L^T */ 14716abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1472010a20caSHong Zhang v = aa + diag[i] - 1; 1473010a20caSHong Zhang vi = aj + diag[i] - 1; 1474f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 147504fbf559SBarry Smith s1 = tmp[i]; 147604fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 14776abc6512SBarry Smith } 14786abc6512SBarry Smith 14796abc6512SBarry Smith /* copy tmp into x according to permutation */ 14806abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 14816abc6512SBarry Smith 14829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 14866abc6512SBarry Smith 14879566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14883a40ed3dSBarry Smith PetscFunctionReturn(0); 1489da3a660dSBarry Smith } 149004fbf559SBarry Smith 14919371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) { 1492533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1493533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1494533e6011SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1495533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1496533e6011SShri Abhyankar PetscInt nz; 1497533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1498533e6011SShri Abhyankar const MatScalar *aa = a->a, *v; 1499533e6011SShri Abhyankar const PetscScalar *b; 1500533e6011SShri Abhyankar 1501533e6011SShri Abhyankar PetscFunctionBegin; 15029566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 15039566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 15049566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1505533e6011SShri Abhyankar tmp = a->solve_work; 1506533e6011SShri Abhyankar 15079371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 15089371c9d4SSatish Balay r = rout; 15099371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 15109371c9d4SSatish Balay c = cout; 1511533e6011SShri Abhyankar 1512533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1513533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1514533e6011SShri Abhyankar 1515533e6011SShri Abhyankar /* forward solve the U^T */ 1516533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1517533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1518533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1519533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1520533e6011SShri Abhyankar s1 = tmp[i]; 1521533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1522533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1523533e6011SShri Abhyankar tmp[i] = s1; 1524533e6011SShri Abhyankar } 1525533e6011SShri Abhyankar 1526533e6011SShri Abhyankar /* backward solve the L^T */ 1527533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1528533e6011SShri Abhyankar v = aa + ai[i]; 1529533e6011SShri Abhyankar vi = aj + ai[i]; 1530533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1531533e6011SShri Abhyankar s1 = tmp[i]; 1532c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1533533e6011SShri Abhyankar } 1534533e6011SShri Abhyankar 1535533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1536533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1537533e6011SShri Abhyankar 15389566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15399566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 15409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1542533e6011SShri Abhyankar 15439566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1544533e6011SShri Abhyankar PetscFunctionReturn(0); 1545533e6011SShri Abhyankar } 1546533e6011SShri Abhyankar 15479e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 1548b47f5a65SHong Zhang 15498fc3a347SHong Zhang /* 15508a76ed4fSHong Zhang ilu() under revised new data structure. 1551813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1552813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1553813a1f2bSShri Abhyankar 1554813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1555baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1556baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1557baabb860SHong Zhang 1558813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1559baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1560a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1561813a1f2bSShri Abhyankar 1562813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1563813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1564813a1f2bSShri Abhyankar */ 15659371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1566813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1567bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1568bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 15691df811f5SHong Zhang IS isicol; 1570813a1f2bSShri Abhyankar 1571813a1f2bSShri Abhyankar PetscFunctionBegin; 15729566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 15739566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1574813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 1575813a1f2bSShri Abhyankar 1576813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 15779566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 15782205254eSKarl Rupp 1579813a1f2bSShri Abhyankar b->singlemalloc = PETSC_TRUE; 1580*4dfa11a4SJacob Faibussowitsch if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); } 1581813a1f2bSShri Abhyankar bdiag = b->diag; 1582813a1f2bSShri Abhyankar 158348a46eb9SPierre Jolivet if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1584813a1f2bSShri Abhyankar 1585813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1586813a1f2bSShri Abhyankar bi = b->i; 1587813a1f2bSShri Abhyankar bj = b->j; 1588813a1f2bSShri Abhyankar 1589813a1f2bSShri Abhyankar /* L part */ 1590e60cf9a0SBarry Smith bi[0] = 0; 1591813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1592813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1593813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1594813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1595ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1596813a1f2bSShri Abhyankar } 1597813a1f2bSShri Abhyankar 1598813a1f2bSShri Abhyankar /* U part */ 159962fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1600813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1601813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1602813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1603ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1604813a1f2bSShri Abhyankar /* diag[i] */ 1605bbd65245SShri Abhyankar bj[k++] = i; 1606a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1607813a1f2bSShri Abhyankar } 16081df811f5SHong Zhang 1609d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 16101df811f5SHong Zhang fact->info.factor_mallocs = 0; 16111df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 16121df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 161335233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 16149566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 16151df811f5SHong Zhang 16161df811f5SHong Zhang b = (Mat_SeqAIJ *)(fact)->data; 16171df811f5SHong Zhang b->row = isrow; 16181df811f5SHong Zhang b->col = iscol; 16191df811f5SHong Zhang b->icol = isicol; 16209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 16219566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 16229566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1623813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1624813a1f2bSShri Abhyankar } 1625813a1f2bSShri Abhyankar 16269371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1627813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1628813a1f2bSShri Abhyankar IS isicol; 1629813a1f2bSShri Abhyankar const PetscInt *r, *ic; 16301df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1631813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1632813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1633813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1634035f4963SHong Zhang PetscBool col_identity, row_identity, missing; 1635813a1f2bSShri Abhyankar PetscReal f; 16360298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1637813a1f2bSShri Abhyankar PetscBT lnkbt; 1638813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 16390298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 16400298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1641813a1f2bSShri Abhyankar 1642813a1f2bSShri Abhyankar PetscFunctionBegin; 164308401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 16449566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 164528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1646ad04f41aSHong Zhang 1647813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 16489566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 16499566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1650813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1651813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 16529566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1653ad540459SPierre Jolivet if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1654813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1655813a1f2bSShri Abhyankar } 1656813a1f2bSShri Abhyankar 16579566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 16589566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 16599566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1660813a1f2bSShri Abhyankar 16611bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 16629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 16639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1664e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 16659566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1666813a1f2bSShri Abhyankar 1667813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1668813a1f2bSShri Abhyankar nlnk = n + 1; 16699566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1670813a1f2bSShri Abhyankar 1671813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 16721df811f5SHong Zhang f = info->fill; 16731df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 16749566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1675813a1f2bSShri Abhyankar current_space = free_space; 16769566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1677813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1678813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1679813a1f2bSShri Abhyankar nzi = 0; 1680813a1f2bSShri Abhyankar /* copy current row into linked list */ 1681813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 168228b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1683813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1684813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 16859566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1686813a1f2bSShri Abhyankar nzi += nlnk; 1687813a1f2bSShri Abhyankar 1688813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1689813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1690813a1f2bSShri Abhyankar fm = n; 1691813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1692813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1693813a1f2bSShri Abhyankar lnk[fm] = i; 1694813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 16959371c9d4SSatish Balay nzi++; 16969371c9d4SSatish Balay dcount++; 1697813a1f2bSShri Abhyankar } 1698813a1f2bSShri Abhyankar 1699813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1700813a1f2bSShri Abhyankar nzbd = 0; 1701813a1f2bSShri Abhyankar prow = lnk[n]; 1702813a1f2bSShri Abhyankar while (prow < i) { 1703813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1704813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1705813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1706813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 17079566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1708813a1f2bSShri Abhyankar nzi += nlnk; 1709813a1f2bSShri Abhyankar prow = lnk[prow]; 1710813a1f2bSShri Abhyankar nzbd++; 1711813a1f2bSShri Abhyankar } 1712813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1713813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1714813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1715813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1716f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 17179566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 17189566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1719813a1f2bSShri Abhyankar reallocs++; 1720813a1f2bSShri Abhyankar } 1721813a1f2bSShri Abhyankar 1722813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 17239566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1724813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1725813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1726813a1f2bSShri Abhyankar 1727813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 172808401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1729813a1f2bSShri Abhyankar 1730813a1f2bSShri Abhyankar current_space->array += nzi; 1731813a1f2bSShri Abhyankar current_space->local_used += nzi; 1732813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1733813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1734813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1735813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1736813a1f2bSShri Abhyankar } 1737813a1f2bSShri Abhyankar 17389566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 17399566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1740813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 17419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 17429566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1743813a1f2bSShri Abhyankar 17449566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 17459566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 17469566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1747813a1f2bSShri Abhyankar 1748813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1749813a1f2bSShri Abhyankar { 1750aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 17519566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 17529566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 17539566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 17549566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 175548a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1756813a1f2bSShri Abhyankar } 1757813a1f2bSShri Abhyankar #endif 1758813a1f2bSShri Abhyankar /* put together the new matrix */ 17599566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1760813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 17612205254eSKarl Rupp 1762813a1f2bSShri Abhyankar b->free_a = PETSC_TRUE; 1763813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 1764813a1f2bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 17652205254eSKarl Rupp 17669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 17672205254eSKarl Rupp 1768813a1f2bSShri Abhyankar b->j = bj; 1769813a1f2bSShri Abhyankar b->i = bi; 1770813a1f2bSShri Abhyankar b->diag = bdiag; 1771f4259b30SLisandro Dalcin b->ilen = NULL; 1772f4259b30SLisandro Dalcin b->imax = NULL; 1773813a1f2bSShri Abhyankar b->row = isrow; 1774813a1f2bSShri Abhyankar b->col = iscol; 17759566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 17769566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1777813a1f2bSShri Abhyankar b->icol = isicol; 17782205254eSKarl Rupp 17799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1780813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1781813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1782f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 17832205254eSKarl Rupp 1784813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = reallocs; 1785813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = f; 1786f268cba8SShri Abhyankar (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 178735233d99SShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1788ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 17899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1790813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1791813a1f2bSShri Abhyankar } 1792813a1f2bSShri Abhyankar 17939371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 17946516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 17956516239fSHong Zhang IS isicol; 17966516239fSHong Zhang const PetscInt *r, *ic; 1797ece542f9SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 17986516239fSHong Zhang PetscInt *bi, *cols, nnz, *cols_lvl; 17996516239fSHong Zhang PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 18006516239fSHong Zhang PetscInt i, levels, diagonal_fill; 1801ace3abfcSBarry Smith PetscBool col_identity, row_identity; 18026516239fSHong Zhang PetscReal f; 18030298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 18046516239fSHong Zhang PetscBT lnkbt; 18056516239fSHong Zhang PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 18060298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 18070298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1808ace3abfcSBarry Smith PetscBool missing; 18096516239fSHong Zhang 18106516239fSHong Zhang PetscFunctionBegin; 181108401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 18129566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 181328b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1814ece542f9SHong Zhang 18156516239fSHong Zhang f = info->fill; 18166516239fSHong Zhang levels = (PetscInt)info->levels; 18176516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 18182205254eSKarl Rupp 18199566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 18206516239fSHong Zhang 18219566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 18229566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 18236516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 18249566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 18252205254eSKarl Rupp 1826ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1827ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1828d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 1829719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 1830719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 1831719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 18322205254eSKarl Rupp 1833719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 183408480c60SBarry Smith b->row = isrow; 183508480c60SBarry Smith b->col = iscol; 183682bf6240SBarry Smith b->icol = isicol; 18379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 18389566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 18399566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 18403a40ed3dSBarry Smith PetscFunctionReturn(0); 184108480c60SBarry Smith } 184208480c60SBarry Smith 18439566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 18449566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 18459e25ed09SBarry Smith 18461bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 18479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 18489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1849a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 18506cf997b0SBarry Smith 18519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 18525a8e39fbSHong Zhang 18535a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 18545a8e39fbSHong Zhang nlnk = n + 1; 18559566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 18565a8e39fbSHong Zhang 18575a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 18589566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 18595a8e39fbSHong Zhang current_space = free_space; 18609566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 18615a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 18625a8e39fbSHong Zhang 18635a8e39fbSHong Zhang for (i = 0; i < n; i++) { 18645a8e39fbSHong Zhang nzi = 0; 18656cf997b0SBarry Smith /* copy current row into linked list */ 18665a8e39fbSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 186728b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 18685a8e39fbSHong Zhang cols = aj + ai[r[i]]; 18695a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 18709566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 18715a8e39fbSHong Zhang nzi += nlnk; 18726cf997b0SBarry Smith 18736cf997b0SBarry Smith /* make sure diagonal entry is included */ 18745a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 18756cf997b0SBarry Smith fm = n; 18765a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 18775a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 18785a8e39fbSHong Zhang lnk[fm] = i; 18795a8e39fbSHong Zhang lnk_lvl[i] = 0; 18809371c9d4SSatish Balay nzi++; 18819371c9d4SSatish Balay dcount++; 18826cf997b0SBarry Smith } 18836cf997b0SBarry Smith 18845a8e39fbSHong Zhang /* add pivot rows into the active row */ 18855a8e39fbSHong Zhang nzbd = 0; 18865a8e39fbSHong Zhang prow = lnk[n]; 18875a8e39fbSHong Zhang while (prow < i) { 18885a8e39fbSHong Zhang nnz = bdiag[prow]; 18895a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 18905a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 18915a8e39fbSHong Zhang nnz = bi[prow + 1] - bi[prow] - nnz - 1; 18929566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 18935a8e39fbSHong Zhang nzi += nlnk; 18945a8e39fbSHong Zhang prow = lnk[prow]; 18955a8e39fbSHong Zhang nzbd++; 189656beaf04SBarry Smith } 18975a8e39fbSHong Zhang bdiag[i] = nzbd; 18985a8e39fbSHong Zhang bi[i + 1] = bi[i] + nzi; 1899ecf371e4SBarry Smith 19005a8e39fbSHong Zhang /* if free space is not available, make more free space */ 19015a8e39fbSHong Zhang if (current_space->local_remaining < nzi) { 1902f91af8c7SBarry Smith nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 19039566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 19049566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 19055a8e39fbSHong Zhang reallocs++; 19065a8e39fbSHong Zhang } 1907ecf371e4SBarry Smith 19085a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 19099566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 19105a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 19115a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 19125a8e39fbSHong Zhang 19135a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 191408401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 19155a8e39fbSHong Zhang 19165a8e39fbSHong Zhang current_space->array += nzi; 19175a8e39fbSHong Zhang current_space->local_used += nzi; 19185a8e39fbSHong Zhang current_space->local_remaining -= nzi; 19195a8e39fbSHong Zhang current_space_lvl->array += nzi; 19205a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 19215a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 19229e25ed09SBarry Smith } 19235a8e39fbSHong Zhang 19249566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 19259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 19265a8e39fbSHong Zhang 19275a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 19289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 19299566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 19309566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 19319566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 19329566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 19339e25ed09SBarry Smith 19346cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1935f64065bbSBarry Smith { 19365a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 19379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 19389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 19399566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 19409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 194148a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1942f64065bbSBarry Smith } 194363ba0a88SBarry Smith #endif 1944bbb6d6a8SBarry Smith 19459e25ed09SBarry Smith /* put together the new matrix */ 19469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1947719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 19482205254eSKarl Rupp 1949e6b907acSBarry Smith b->free_a = PETSC_TRUE; 1950e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 19517c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 19522205254eSKarl Rupp 19539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n], &b->a)); 19545a8e39fbSHong Zhang b->j = bj; 19555a8e39fbSHong Zhang b->i = bi; 19565a8e39fbSHong Zhang for (i = 0; i < n; i++) bdiag[i] += bi[i]; 19575a8e39fbSHong Zhang b->diag = bdiag; 1958f4259b30SLisandro Dalcin b->ilen = NULL; 1959f4259b30SLisandro Dalcin b->imax = NULL; 1960416022c9SBarry Smith b->row = isrow; 1961416022c9SBarry Smith b->col = iscol; 19629566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 19639566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 196482bf6240SBarry Smith b->icol = isicol; 19659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1966416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 19675a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 19685a8e39fbSHong Zhang b->maxnz = b->nz = bi[n]; 19692205254eSKarl Rupp 1970719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 1971719d5645SBarry Smith (fact)->info.fill_ratio_given = f; 1972719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1973ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1974ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 19753a40ed3dSBarry Smith PetscFunctionReturn(0); 19769e25ed09SBarry Smith } 1977d5d45c9bSBarry Smith 19789371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 19795f44c854SHong Zhang Mat C = B; 19805f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 19815f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 19825f44c854SHong Zhang IS ip = b->row, iip = b->icol; 19835f44c854SHong Zhang const PetscInt *rip, *riip; 19845f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 19855f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 19865f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 19875f44c854SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1988ace3abfcSBarry Smith PetscBool perm_identity; 1989d90ac83dSHong Zhang FactorShiftCtx sctx; 199098a93161SHong Zhang PetscReal rs; 199198a93161SHong Zhang MatScalar d, *v; 199298a93161SHong Zhang 19935f44c854SHong Zhang PetscFunctionBegin; 199498a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 19959566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 199698a93161SHong Zhang 1997f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 199898a93161SHong Zhang sctx.shift_top = info->zeropivot; 199998a93161SHong Zhang for (i = 0; i < mbs; i++) { 200098a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 200198a93161SHong Zhang d = (aa)[a->diag[i]]; 200298a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 200398a93161SHong Zhang v = aa + ai[i]; 200498a93161SHong Zhang nz = ai[i + 1] - ai[i]; 20052205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 200698a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 200798a93161SHong Zhang } 200898a93161SHong Zhang sctx.shift_top *= 1.1; 200998a93161SHong Zhang sctx.nshift_max = 5; 201098a93161SHong Zhang sctx.shift_lo = 0.; 201198a93161SHong Zhang sctx.shift_hi = 1.; 201298a93161SHong Zhang } 20135f44c854SHong Zhang 20149566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 20159566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 20165f44c854SHong Zhang 20175f44c854SHong Zhang /* allocate working arrays 20185f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 20195f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 20205f44c854SHong Zhang */ 20219566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 20225f44c854SHong Zhang 202398a93161SHong Zhang do { 202407b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 202598a93161SHong Zhang 2026c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 20272e987568SSatish Balay if (mbs) il[0] = 0; 20285f44c854SHong Zhang 20295f44c854SHong Zhang for (k = 0; k < mbs; k++) { 20305f44c854SHong Zhang /* zero rtmp */ 20315f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 20325f44c854SHong Zhang bjtmp = bj + bi[k]; 20335f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 20345f44c854SHong Zhang 20355f44c854SHong Zhang /* load in initial unfactored row */ 20365f44c854SHong Zhang bval = ba + bi[k]; 20379371c9d4SSatish Balay jmin = ai[rip[k]]; 20389371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 20395f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 20405f44c854SHong Zhang col = riip[aj[j]]; 20415f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 20425f44c854SHong Zhang rtmp[col] = aa[j]; 20435f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 20445f44c854SHong Zhang } 20455f44c854SHong Zhang } 204698a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 204798a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 20485f44c854SHong Zhang 20495f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 20505f44c854SHong Zhang dk = rtmp[k]; 20515f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 20525f44c854SHong Zhang 20535f44c854SHong Zhang while (i < k) { 20545f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 20555f44c854SHong Zhang 20565f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 20575f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 20585f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 20595f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 20605f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 20615f44c854SHong Zhang 20625f44c854SHong Zhang /* add multiple of row i to k-th row */ 20639371c9d4SSatish Balay jmin = ili + 1; 20649371c9d4SSatish Balay jmax = bi[i + 1]; 20655f44c854SHong Zhang if (jmin < jmax) { 20665f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 20675f44c854SHong Zhang /* update il and c2r for row i */ 20685f44c854SHong Zhang il[i] = jmin; 20699371c9d4SSatish Balay j = bj[jmin]; 20709371c9d4SSatish Balay c2r[i] = c2r[j]; 20719371c9d4SSatish Balay c2r[j] = i; 20725f44c854SHong Zhang } 20735f44c854SHong Zhang i = nexti; 20745f44c854SHong Zhang } 20755f44c854SHong Zhang 20765f44c854SHong Zhang /* copy data into U(k,:) */ 207798a93161SHong Zhang rs = 0.0; 20789371c9d4SSatish Balay jmin = bi[k]; 20799371c9d4SSatish Balay jmax = bi[k + 1] - 1; 20805f44c854SHong Zhang if (jmin < jmax) { 20815f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 20829371c9d4SSatish Balay col = bj[j]; 20839371c9d4SSatish Balay ba[j] = rtmp[col]; 20849371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 20855f44c854SHong Zhang } 20865f44c854SHong Zhang /* add the k-th row into il and c2r */ 20875f44c854SHong Zhang il[k] = jmin; 20889371c9d4SSatish Balay i = bj[jmin]; 20899371c9d4SSatish Balay c2r[k] = c2r[i]; 20909371c9d4SSatish Balay c2r[i] = k; 20915f44c854SHong Zhang } 209298a93161SHong Zhang 209398a93161SHong Zhang /* MatPivotCheck() */ 209498a93161SHong Zhang sctx.rs = rs; 209598a93161SHong Zhang sctx.pv = dk; 20969566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 209707b50cabSHong Zhang if (sctx.newshift) break; 209898a93161SHong Zhang dk = sctx.pv; 209998a93161SHong Zhang 210098a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 210198a93161SHong Zhang } 210207b50cabSHong Zhang } while (sctx.newshift); 21035f44c854SHong Zhang 21049566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 21059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 21069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 21075f44c854SHong Zhang 21089566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 21095f44c854SHong Zhang if (perm_identity) { 211035233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 211135233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 21122169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 21132169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 21145f44c854SHong Zhang } else { 211535233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 211635233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 211735233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 211835233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 21195f44c854SHong Zhang } 21205f44c854SHong Zhang 21215f44c854SHong Zhang C->assembled = PETSC_TRUE; 21225f44c854SHong Zhang C->preallocated = PETSC_TRUE; 21232205254eSKarl Rupp 21249566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 212598a93161SHong Zhang 212698a93161SHong Zhang /* MatPivotView() */ 212798a93161SHong Zhang if (sctx.nshift) { 2128f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 21299566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2130f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 21319566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2132f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 21339566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 213498a93161SHong Zhang } 213598a93161SHong Zhang } 21365f44c854SHong Zhang PetscFunctionReturn(0); 21375f44c854SHong Zhang } 21385f44c854SHong Zhang 21399371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 2140719d5645SBarry Smith Mat C = B; 21410968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 21422ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 21439bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 21445d0c19d7SBarry Smith const PetscInt *rip, *riip; 2145c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 21462ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 21472ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2148622e2028SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2149ace3abfcSBarry Smith PetscBool perm_identity; 21500e95ead3SHong Zhang FactorShiftCtx sctx; 21510e95ead3SHong Zhang PetscReal rs; 21520e95ead3SHong Zhang MatScalar d, *v; 2153d5d45c9bSBarry Smith 2154a6175056SHong Zhang PetscFunctionBegin; 21550e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 21569566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 21570e95ead3SHong Zhang 21580e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 21590e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 21600e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 21610e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 21620e95ead3SHong Zhang d = (aa)[a->diag[i]]; 21630e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 21640e95ead3SHong Zhang v = aa + ai[i]; 21650e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 21662205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 21670e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 21680e95ead3SHong Zhang } 21690e95ead3SHong Zhang sctx.shift_top *= 1.1; 21700e95ead3SHong Zhang sctx.nshift_max = 5; 21710e95ead3SHong Zhang sctx.shift_lo = 0.; 21720e95ead3SHong Zhang sctx.shift_hi = 1.; 21730e95ead3SHong Zhang } 2174ee45ca4aSHong Zhang 21759566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 21769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 21772ed133dbSHong Zhang 21782ed133dbSHong Zhang /* initialization */ 21799566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 21800e95ead3SHong Zhang 21812ed133dbSHong Zhang do { 218207b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 21830e95ead3SHong Zhang 2184c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 2185c5546cabSHong Zhang il[0] = 0; 21862ed133dbSHong Zhang 21872ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 2188c5546cabSHong Zhang /* zero rtmp */ 2189c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 2190c5546cabSHong Zhang bjtmp = bj + bi[k]; 2191c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2192c5546cabSHong Zhang 2193c05c3958SHong Zhang bval = ba + bi[k]; 21942ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 21959371c9d4SSatish Balay jmin = ai[rip[k]]; 21969371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 21972ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 21989bfd6278SHong Zhang col = riip[aj[j]]; 21992ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 22002ed133dbSHong Zhang rtmp[col] = aa[j]; 2201c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 22022ed133dbSHong Zhang } 22032ed133dbSHong Zhang } 220439e3d630SHong Zhang /* shift the diagonal of the matrix */ 2205540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 22062ed133dbSHong Zhang 22072ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 22082ed133dbSHong Zhang dk = rtmp[k]; 22092ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 22102ed133dbSHong Zhang 22112ed133dbSHong Zhang while (i < k) { 22122ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 22132ed133dbSHong Zhang 22142ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 22152ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 22162ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 22172ed133dbSHong Zhang dk += uikdi * ba[ili]; 22182ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 22192ed133dbSHong Zhang 22202ed133dbSHong Zhang /* add multiple of row i to k-th row */ 22219371c9d4SSatish Balay jmin = ili + 1; 22229371c9d4SSatish Balay jmax = bi[i + 1]; 22232ed133dbSHong Zhang if (jmin < jmax) { 22242ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 22252ed133dbSHong Zhang /* update il and jl for row i */ 22262ed133dbSHong Zhang il[i] = jmin; 22279371c9d4SSatish Balay j = bj[jmin]; 22289371c9d4SSatish Balay jl[i] = jl[j]; 22299371c9d4SSatish Balay jl[j] = i; 22302ed133dbSHong Zhang } 22312ed133dbSHong Zhang i = nexti; 22322ed133dbSHong Zhang } 22332ed133dbSHong Zhang 22340697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 22350697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 22360697b6caSHong Zhang rs = 0.0; 22373c9e8598SHong Zhang jmin = bi[k] + 1; 22383c9e8598SHong Zhang nz = bi[k + 1] - jmin; 22393c9e8598SHong Zhang bcol = bj + jmin; 2240ad540459SPierre Jolivet for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2241540703acSHong Zhang 2242540703acSHong Zhang sctx.rs = rs; 2243540703acSHong Zhang sctx.pv = dk; 22449566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 224507b50cabSHong Zhang if (sctx.newshift) break; 22460e95ead3SHong Zhang dk = sctx.pv; 22473c9e8598SHong Zhang 22483c9e8598SHong Zhang /* copy data into U(k,:) */ 224939e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 22509371c9d4SSatish Balay jmin = bi[k] + 1; 22519371c9d4SSatish Balay jmax = bi[k + 1]; 225239e3d630SHong Zhang if (jmin < jmax) { 225339e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 22549371c9d4SSatish Balay col = bj[j]; 22559371c9d4SSatish Balay ba[j] = rtmp[col]; 22563c9e8598SHong Zhang } 225739e3d630SHong Zhang /* add the k-th row into il and jl */ 22583c9e8598SHong Zhang il[k] = jmin; 22599371c9d4SSatish Balay i = bj[jmin]; 22609371c9d4SSatish Balay jl[k] = jl[i]; 22619371c9d4SSatish Balay jl[i] = k; 22623c9e8598SHong Zhang } 22633c9e8598SHong Zhang } 226407b50cabSHong Zhang } while (sctx.newshift); 22650e95ead3SHong Zhang 22669566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 22679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 22689566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2269db4efbfdSBarry Smith 22709566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2271db4efbfdSBarry Smith if (perm_identity) { 22720a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22730a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22740a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 22750a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2276db4efbfdSBarry Smith } else { 22770a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 22780a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 22790a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 22800a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2281db4efbfdSBarry Smith } 2282db4efbfdSBarry Smith 22833c9e8598SHong Zhang C->assembled = PETSC_TRUE; 22843c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 22852205254eSKarl Rupp 22869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2287540703acSHong Zhang if (sctx.nshift) { 2288f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 22899566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2290f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 22919566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 22920697b6caSHong Zhang } 22933c9e8598SHong Zhang } 22943c9e8598SHong Zhang PetscFunctionReturn(0); 22953c9e8598SHong Zhang } 2296a6175056SHong Zhang 22978a76ed4fSHong Zhang /* 22988a76ed4fSHong Zhang icc() under revised new data structure. 22998a76ed4fSHong Zhang Factored arrays bj and ba are stored as 23008a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 23018a76ed4fSHong Zhang 23028a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 23038a76ed4fSHong Zhang ui+ 23048a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 23058a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 23068a76ed4fSHong Zhang 23078a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 23088a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 23098a76ed4fSHong Zhang 23108a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 23118a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 23128a76ed4fSHong Zhang */ 23138a76ed4fSHong Zhang 23149371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 23150968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2316ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2317ace3abfcSBarry Smith PetscBool perm_identity, missing; 23185f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 23195d0c19d7SBarry Smith const PetscInt *rip, *riip; 2320ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 23210298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 23225a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2323ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 23240298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 23250298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 23267a48dd6fSHong Zhang PetscBT lnkbt; 2327b635d36bSHong Zhang IS iperm; 2328a6175056SHong Zhang 2329a6175056SHong Zhang PetscFunctionBegin; 233008401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 23319566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 233228b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 23339566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 23349566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 23357a48dd6fSHong Zhang 23369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 23379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 233839e3d630SHong Zhang ui[0] = 0; 233939e3d630SHong Zhang 23408a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2341622e2028SHong Zhang if (!levels && perm_identity) { 23425f44c854SHong Zhang for (i = 0; i < am; i++) { 2343c5546cabSHong Zhang ncols = ai[i + 1] - a->diag[i]; 2344c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2345c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2346dc1e2a3fSHong Zhang } 23479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2348dc1e2a3fSHong Zhang cols = uj; 2349dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 23505f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2351dc1e2a3fSHong Zhang ncols = ai[i + 1] - a->diag[i] - 1; 23525f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2353910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 23545f44c854SHong Zhang } 23555f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 23569566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 23579566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 23585f44c854SHong Zhang 23595f44c854SHong Zhang /* initialization */ 23609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 23615f44c854SHong Zhang 23625f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 23635f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 23649566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 23655f44c854SHong Zhang for (i = 0; i < am; i++) { 23669371c9d4SSatish Balay jl[i] = am; 23679371c9d4SSatish Balay il[i] = 0; 23685f44c854SHong Zhang } 23695f44c854SHong Zhang 23705f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 23715f44c854SHong Zhang nlnk = am + 1; 23729566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 23735f44c854SHong Zhang 237495b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 23759566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 23765f44c854SHong Zhang current_space = free_space; 23779566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 23785f44c854SHong Zhang current_space_lvl = free_space_lvl; 23795f44c854SHong Zhang 23805f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 23815f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 23825f44c854SHong Zhang nzk = 0; 23835f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 238428b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 23855f44c854SHong Zhang ncols_upper = 0; 23865f44c854SHong Zhang for (j = 0; j < ncols; j++) { 23875f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 23885f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 23895f44c854SHong Zhang ajtmp[ncols_upper] = i; 23905f44c854SHong Zhang ncols_upper++; 23915f44c854SHong Zhang } 23925f44c854SHong Zhang } 23939566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 23945f44c854SHong Zhang nzk += nlnk; 23955f44c854SHong Zhang 23965f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 23975f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 23985f44c854SHong Zhang 23995f44c854SHong Zhang while (prow < k) { 24005f44c854SHong Zhang nextprow = jl[prow]; 24015f44c854SHong Zhang 24025f44c854SHong Zhang /* merge prow into k-th row */ 24035f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 24045f44c854SHong Zhang jmax = ui[prow + 1]; 24055f44c854SHong Zhang ncols = jmax - jmin; 24065f44c854SHong Zhang i = jmin - ui[prow]; 24075f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 24085f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 24095f44c854SHong Zhang j = *(uj - 1); 24109566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 24115f44c854SHong Zhang nzk += nlnk; 24125f44c854SHong Zhang 24135f44c854SHong Zhang /* update il and jl for prow */ 24145f44c854SHong Zhang if (jmin < jmax) { 24155f44c854SHong Zhang il[prow] = jmin; 24169371c9d4SSatish Balay j = *cols; 24179371c9d4SSatish Balay jl[prow] = jl[j]; 24189371c9d4SSatish Balay jl[j] = prow; 24195f44c854SHong Zhang } 24205f44c854SHong Zhang prow = nextprow; 24215f44c854SHong Zhang } 24225f44c854SHong Zhang 24235f44c854SHong Zhang /* if free space is not available, make more free space */ 24245f44c854SHong Zhang if (current_space->local_remaining < nzk) { 24255f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2426f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 24279566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 24289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 24295f44c854SHong Zhang reallocs++; 24305f44c854SHong Zhang } 24315f44c854SHong Zhang 24325f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 243308401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 24349566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 24355f44c854SHong Zhang 24365f44c854SHong Zhang /* add the k-th row into il and jl */ 24375f44c854SHong Zhang if (nzk > 1) { 24385f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 24399371c9d4SSatish Balay jl[k] = jl[i]; 24409371c9d4SSatish Balay jl[i] = k; 24415f44c854SHong Zhang il[k] = ui[k] + 1; 24425f44c854SHong Zhang } 24435f44c854SHong Zhang uj_ptr[k] = current_space->array; 24445f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 24455f44c854SHong Zhang 24465f44c854SHong Zhang current_space->array += nzk; 24475f44c854SHong Zhang current_space->local_used += nzk; 24485f44c854SHong Zhang current_space->local_remaining -= nzk; 24495f44c854SHong Zhang 24505f44c854SHong Zhang current_space_lvl->array += nzk; 24515f44c854SHong Zhang current_space_lvl->local_used += nzk; 24525f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 24535f44c854SHong Zhang 24545f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 24555f44c854SHong Zhang } 24565f44c854SHong Zhang 24579566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 24589566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 24599566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 24609566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 24615f44c854SHong Zhang 24629263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 24639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 24649566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 24659566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 24669566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 24675f44c854SHong Zhang 24685f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 24695f44c854SHong Zhang 24705f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 24715f44c854SHong Zhang b = (Mat_SeqSBAIJ *)(fact)->data; 24725f44c854SHong Zhang b->singlemalloc = PETSC_FALSE; 24732205254eSKarl Rupp 24749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 24752205254eSKarl Rupp 24765f44c854SHong Zhang b->j = uj; 24775f44c854SHong Zhang b->i = ui; 24785f44c854SHong Zhang b->diag = udiag; 24797f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2480f4259b30SLisandro Dalcin b->ilen = NULL; 2481f4259b30SLisandro Dalcin b->imax = NULL; 24825f44c854SHong Zhang b->row = perm; 24835f44c854SHong Zhang b->col = perm; 24849566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 24859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 24865f44c854SHong Zhang b->icol = iperm; 24875f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 24882205254eSKarl Rupp 24899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 24902205254eSKarl Rupp 24915f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 24925f44c854SHong Zhang b->free_a = PETSC_TRUE; 24935f44c854SHong Zhang b->free_ij = PETSC_TRUE; 24945f44c854SHong Zhang 2495f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2496f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 24975f44c854SHong Zhang if (ai[am] != 0) { 24986a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 249995b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 25005f44c854SHong Zhang } else { 2501f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 25025f44c854SHong Zhang } 25039263d837SHong Zhang #if defined(PETSC_USE_INFO) 25049263d837SHong Zhang if (ai[am] != 0) { 25059263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 25069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 25079566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 25089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 25099263d837SHong Zhang } else { 25109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 25119263d837SHong Zhang } 25129263d837SHong Zhang #endif 251335233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 25145f44c854SHong Zhang PetscFunctionReturn(0); 25155f44c854SHong Zhang } 25165f44c854SHong Zhang 25179371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 25185f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 25195f44c854SHong Zhang Mat_SeqSBAIJ *b; 2520ace3abfcSBarry Smith PetscBool perm_identity, missing; 25215f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 25225f44c854SHong Zhang const PetscInt *rip, *riip; 25235f44c854SHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 25240298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 25255f44c854SHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 25265f44c854SHong Zhang PetscReal fill = info->fill, levels = info->levels; 25270298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 25280298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 25295f44c854SHong Zhang PetscBT lnkbt; 25305f44c854SHong Zhang IS iperm; 25315f44c854SHong Zhang 25325f44c854SHong Zhang PetscFunctionBegin; 253308401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 25349566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 253528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 25369566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 25379566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 25385f44c854SHong Zhang 25399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 25409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 25415f44c854SHong Zhang ui[0] = 0; 25425f44c854SHong Zhang 25435f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 25445f44c854SHong Zhang if (!levels && perm_identity) { 25455f44c854SHong Zhang for (i = 0; i < am; i++) { 25465f44c854SHong Zhang ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 25475f44c854SHong Zhang udiag[i] = ui[i]; 2548ed59401aSHong Zhang } 25499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 255039e3d630SHong Zhang cols = uj; 2551ed59401aSHong Zhang for (i = 0; i < am; i++) { 2552ed59401aSHong Zhang aj = a->j + a->diag[i]; 255339e3d630SHong Zhang ncols = ui[i + 1] - ui[i]; 255439e3d630SHong Zhang for (j = 0; j < ncols; j++) *cols++ = *aj++; 2555ed59401aSHong Zhang } 255639e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 25579566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 25589566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 2559b635d36bSHong Zhang 25607a48dd6fSHong Zhang /* initialization */ 25619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 25627a48dd6fSHong Zhang 25637a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 25647a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 25659566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 25667a48dd6fSHong Zhang for (i = 0; i < am; i++) { 25679371c9d4SSatish Balay jl[i] = am; 25689371c9d4SSatish Balay il[i] = 0; 25697a48dd6fSHong Zhang } 25707a48dd6fSHong Zhang 25717a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 25727a48dd6fSHong Zhang nlnk = am + 1; 25739566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 25747a48dd6fSHong Zhang 25757a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 25769566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 25777a48dd6fSHong Zhang current_space = free_space; 25789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 25797a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 25807a48dd6fSHong Zhang 25817a48dd6fSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 25827a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 25837a48dd6fSHong Zhang nzk = 0; 2584622e2028SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 258528b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2586622e2028SHong Zhang ncols_upper = 0; 2587622e2028SHong Zhang for (j = 0; j < ncols; j++) { 2588b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2589b635d36bSHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 25905a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2591622e2028SHong Zhang ncols_upper++; 2592622e2028SHong Zhang } 2593622e2028SHong Zhang } 25949566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 25957a48dd6fSHong Zhang nzk += nlnk; 25967a48dd6fSHong Zhang 25977a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 25987a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 25997a48dd6fSHong Zhang 26007a48dd6fSHong Zhang while (prow < k) { 26017a48dd6fSHong Zhang nextprow = jl[prow]; 26027a48dd6fSHong Zhang 26037a48dd6fSHong Zhang /* merge prow into k-th row */ 26047a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 26057a48dd6fSHong Zhang jmax = ui[prow + 1]; 26067a48dd6fSHong Zhang ncols = jmax - jmin; 2607ed59401aSHong Zhang i = jmin - ui[prow]; 2608ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 26095a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 26105a8e39fbSHong Zhang j = *(uj - 1); 26119566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 26127a48dd6fSHong Zhang nzk += nlnk; 26137a48dd6fSHong Zhang 26147a48dd6fSHong Zhang /* update il and jl for prow */ 26157a48dd6fSHong Zhang if (jmin < jmax) { 26167a48dd6fSHong Zhang il[prow] = jmin; 26179371c9d4SSatish Balay j = *cols; 26189371c9d4SSatish Balay jl[prow] = jl[j]; 26199371c9d4SSatish Balay jl[j] = prow; 26207a48dd6fSHong Zhang } 26217a48dd6fSHong Zhang prow = nextprow; 26227a48dd6fSHong Zhang } 26237a48dd6fSHong Zhang 26247a48dd6fSHong Zhang /* if free space is not available, make more free space */ 26257a48dd6fSHong Zhang if (current_space->local_remaining < nzk) { 26267a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2627f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 26289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 26299566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 26307a48dd6fSHong Zhang reallocs++; 26317a48dd6fSHong Zhang } 26327a48dd6fSHong Zhang 26337a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 263428b400f6SJacob Faibussowitsch PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 26359566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 26367a48dd6fSHong Zhang 26377a48dd6fSHong Zhang /* add the k-th row into il and jl */ 263839e3d630SHong Zhang if (nzk > 1) { 26397a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 26409371c9d4SSatish Balay jl[k] = jl[i]; 26419371c9d4SSatish Balay jl[i] = k; 26427a48dd6fSHong Zhang il[k] = ui[k] + 1; 26437a48dd6fSHong Zhang } 26447a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 26457a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 26467a48dd6fSHong Zhang 26477a48dd6fSHong Zhang current_space->array += nzk; 26487a48dd6fSHong Zhang current_space->local_used += nzk; 26497a48dd6fSHong Zhang current_space->local_remaining -= nzk; 26507a48dd6fSHong Zhang 26517a48dd6fSHong Zhang current_space_lvl->array += nzk; 26527a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 26537a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 26547a48dd6fSHong Zhang 26557a48dd6fSHong Zhang ui[k + 1] = ui[k] + nzk; 26567a48dd6fSHong Zhang } 26577a48dd6fSHong Zhang 26586cf91177SBarry Smith #if defined(PETSC_USE_INFO) 26597a48dd6fSHong Zhang if (ai[am] != 0) { 266039e3d630SHong Zhang PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 26619566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 26629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 26639566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 26647a48dd6fSHong Zhang } else { 26659566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 26667a48dd6fSHong Zhang } 266763ba0a88SBarry Smith #endif 26687a48dd6fSHong Zhang 26699566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 26709566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 26719566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 26729566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 26737a48dd6fSHong Zhang 26747a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 26759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 26769566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 26779566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 26789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 26797a48dd6fSHong Zhang 268039e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 268139e3d630SHong Zhang 26827a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 26837a48dd6fSHong Zhang 2684f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 26857a48dd6fSHong Zhang b->singlemalloc = PETSC_FALSE; 26862205254eSKarl Rupp 26879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 26882205254eSKarl Rupp 26897a48dd6fSHong Zhang b->j = uj; 26907a48dd6fSHong Zhang b->i = ui; 26915f44c854SHong Zhang b->diag = udiag; 26927f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2693f4259b30SLisandro Dalcin b->ilen = NULL; 2694f4259b30SLisandro Dalcin b->imax = NULL; 26957a48dd6fSHong Zhang b->row = perm; 2696b635d36bSHong Zhang b->col = perm; 26972205254eSKarl Rupp 26989566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 26999566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 27002205254eSKarl Rupp 2701b635d36bSHong Zhang b->icol = iperm; 27027a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 27039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 27047a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 2705e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2706e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 27077a48dd6fSHong Zhang 2708f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2709f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 27107a48dd6fSHong Zhang if (ai[am] != 0) { 2711f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 27127a48dd6fSHong Zhang } else { 2713f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 27147a48dd6fSHong Zhang } 27150a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2716a6175056SHong Zhang PetscFunctionReturn(0); 2717a6175056SHong Zhang } 2718d5d45c9bSBarry Smith 27199371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 27200be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 27210be760fbSHong Zhang Mat_SeqSBAIJ *b; 27229186b105SHong Zhang PetscBool perm_identity, missing; 27230be760fbSHong Zhang PetscReal fill = info->fill; 27240be760fbSHong Zhang const PetscInt *rip, *riip; 27250be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 27260be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 27270be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 27280298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 27290be760fbSHong Zhang PetscBT lnkbt; 27300be760fbSHong Zhang IS iperm; 27310be760fbSHong Zhang 27320be760fbSHong Zhang PetscFunctionBegin; 273308401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 27349566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 273528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 27369186b105SHong Zhang 27370be760fbSHong Zhang /* check whether perm is the identity mapping */ 27389566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 27399566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 27409566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 27419566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 27420be760fbSHong Zhang 27430be760fbSHong Zhang /* initialization */ 27449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 27459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 27460be760fbSHong Zhang ui[0] = 0; 27470be760fbSHong Zhang 27480be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 27490be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 27509566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 27510be760fbSHong Zhang for (i = 0; i < am; i++) { 27529371c9d4SSatish Balay jl[i] = am; 27539371c9d4SSatish Balay il[i] = 0; 27540be760fbSHong Zhang } 27550be760fbSHong Zhang 27560be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 27570be760fbSHong Zhang nlnk = am + 1; 27589566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 27590be760fbSHong Zhang 276095b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 27619566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 27620be760fbSHong Zhang current_space = free_space; 27630be760fbSHong Zhang 27640be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 27650be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 27660be760fbSHong Zhang nzk = 0; 27670be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 276828b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 27690be760fbSHong Zhang ncols_upper = 0; 27700be760fbSHong Zhang for (j = 0; j < ncols; j++) { 27710be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 27720be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 27730be760fbSHong Zhang cols[ncols_upper] = i; 27740be760fbSHong Zhang ncols_upper++; 27750be760fbSHong Zhang } 27760be760fbSHong Zhang } 27779566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 27780be760fbSHong Zhang nzk += nlnk; 27790be760fbSHong Zhang 27800be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 27810be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 27820be760fbSHong Zhang 27830be760fbSHong Zhang while (prow < k) { 27840be760fbSHong Zhang nextprow = jl[prow]; 27850be760fbSHong Zhang /* merge prow into k-th row */ 27860be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 27870be760fbSHong Zhang jmax = ui[prow + 1]; 27880be760fbSHong Zhang ncols = jmax - jmin; 27890be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 27909566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 27910be760fbSHong Zhang nzk += nlnk; 27920be760fbSHong Zhang 27930be760fbSHong Zhang /* update il and jl for prow */ 27940be760fbSHong Zhang if (jmin < jmax) { 27950be760fbSHong Zhang il[prow] = jmin; 27962205254eSKarl Rupp j = *uj_ptr; 27972205254eSKarl Rupp jl[prow] = jl[j]; 27982205254eSKarl Rupp jl[j] = prow; 27990be760fbSHong Zhang } 28000be760fbSHong Zhang prow = nextprow; 28010be760fbSHong Zhang } 28020be760fbSHong Zhang 28030be760fbSHong Zhang /* if free space is not available, make more free space */ 28040be760fbSHong Zhang if (current_space->local_remaining < nzk) { 28050be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2806f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 28079566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 28080be760fbSHong Zhang reallocs++; 28090be760fbSHong Zhang } 28100be760fbSHong Zhang 28110be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 28129566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 28130be760fbSHong Zhang 28140be760fbSHong Zhang /* add the k-th row into il and jl */ 28157b056e98SHong Zhang if (nzk > 1) { 28160be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 28179371c9d4SSatish Balay jl[k] = jl[i]; 28189371c9d4SSatish Balay jl[i] = k; 28190be760fbSHong Zhang il[k] = ui[k] + 1; 28200be760fbSHong Zhang } 28210be760fbSHong Zhang ui_ptr[k] = current_space->array; 28222205254eSKarl Rupp 28230be760fbSHong Zhang current_space->array += nzk; 28240be760fbSHong Zhang current_space->local_used += nzk; 28250be760fbSHong Zhang current_space->local_remaining -= nzk; 28260be760fbSHong Zhang 28270be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 28280be760fbSHong Zhang } 28290be760fbSHong Zhang 28309566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 28319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 28329566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 28330be760fbSHong Zhang 28349263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 28359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 28369566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 28379566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 28380be760fbSHong Zhang 28390be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 28400be760fbSHong Zhang 2841f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 28420be760fbSHong Zhang b->singlemalloc = PETSC_FALSE; 28430be760fbSHong Zhang b->free_a = PETSC_TRUE; 28440be760fbSHong Zhang b->free_ij = PETSC_TRUE; 28452205254eSKarl Rupp 28469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 28472205254eSKarl Rupp 28480be760fbSHong Zhang b->j = uj; 28490be760fbSHong Zhang b->i = ui; 28500be760fbSHong Zhang b->diag = udiag; 28510be760fbSHong Zhang b->free_diag = PETSC_TRUE; 2852f4259b30SLisandro Dalcin b->ilen = NULL; 2853f4259b30SLisandro Dalcin b->imax = NULL; 28540be760fbSHong Zhang b->row = perm; 28550be760fbSHong Zhang b->col = perm; 285626fbe8dcSKarl Rupp 28579566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28589566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 28592205254eSKarl Rupp 28600be760fbSHong Zhang b->icol = iperm; 28610be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 286226fbe8dcSKarl Rupp 28639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 28642205254eSKarl Rupp 28650be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 28660be760fbSHong Zhang 2867f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2868f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 28690be760fbSHong Zhang if (ai[am] != 0) { 28706a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 287195b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 28720be760fbSHong Zhang } else { 2873f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 28740be760fbSHong Zhang } 28759263d837SHong Zhang #if defined(PETSC_USE_INFO) 28769263d837SHong Zhang if (ai[am] != 0) { 28779263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 28789566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 28799566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 28809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 28819263d837SHong Zhang } else { 28829566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 28839263d837SHong Zhang } 28849263d837SHong Zhang #endif 288535233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 28860be760fbSHong Zhang PetscFunctionReturn(0); 28870be760fbSHong Zhang } 28880be760fbSHong Zhang 28899371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2890f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 289110c27e3dSHong Zhang Mat_SeqSBAIJ *b; 28929186b105SHong Zhang PetscBool perm_identity, missing; 289310c27e3dSHong Zhang PetscReal fill = info->fill; 28945d0c19d7SBarry Smith const PetscInt *rip, *riip; 28955d0c19d7SBarry Smith PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 289610c27e3dSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 28972ed133dbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 28980298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 289910c27e3dSHong Zhang PetscBT lnkbt; 29002ed133dbSHong Zhang IS iperm; 2901f76d2b81SHong Zhang 2902f76d2b81SHong Zhang PetscFunctionBegin; 290308401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 29049566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 290528b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 29069186b105SHong Zhang 29072ed133dbSHong Zhang /* check whether perm is the identity mapping */ 29089566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 29099566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 29109566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 29119566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 291210c27e3dSHong Zhang 291310c27e3dSHong Zhang /* initialization */ 29149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 291510c27e3dSHong Zhang ui[0] = 0; 291610c27e3dSHong Zhang 291710c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 29181393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 29199566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 29201393bc97SHong Zhang for (i = 0; i < am; i++) { 29219371c9d4SSatish Balay jl[i] = am; 29229371c9d4SSatish Balay il[i] = 0; 2923f76d2b81SHong Zhang } 292410c27e3dSHong Zhang 292510c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 29261393bc97SHong Zhang nlnk = am + 1; 29279566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 292810c27e3dSHong Zhang 29291393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 29309566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 293110c27e3dSHong Zhang current_space = free_space; 293210c27e3dSHong Zhang 29331393bc97SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 293410c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 293510c27e3dSHong Zhang nzk = 0; 29362ed133dbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 293794bad497SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 29382ed133dbSHong Zhang ncols_upper = 0; 29392ed133dbSHong Zhang for (j = 0; j < ncols; j++) { 29409bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 29412ed133dbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 29422ed133dbSHong Zhang cols[ncols_upper] = i; 29432ed133dbSHong Zhang ncols_upper++; 29442ed133dbSHong Zhang } 29452ed133dbSHong Zhang } 29469566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 294710c27e3dSHong Zhang nzk += nlnk; 294810c27e3dSHong Zhang 294910c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 295010c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 295110c27e3dSHong Zhang 295210c27e3dSHong Zhang while (prow < k) { 295310c27e3dSHong Zhang nextprow = jl[prow]; 295410c27e3dSHong Zhang /* merge prow into k-th row */ 29551393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 295610c27e3dSHong Zhang jmax = ui[prow + 1]; 295710c27e3dSHong Zhang ncols = jmax - jmin; 29581393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 29599566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 296010c27e3dSHong Zhang nzk += nlnk; 296110c27e3dSHong Zhang 296210c27e3dSHong Zhang /* update il and jl for prow */ 296310c27e3dSHong Zhang if (jmin < jmax) { 296410c27e3dSHong Zhang il[prow] = jmin; 29659371c9d4SSatish Balay j = *uj_ptr; 29669371c9d4SSatish Balay jl[prow] = jl[j]; 29679371c9d4SSatish Balay jl[j] = prow; 296810c27e3dSHong Zhang } 296910c27e3dSHong Zhang prow = nextprow; 297010c27e3dSHong Zhang } 297110c27e3dSHong Zhang 297210c27e3dSHong Zhang /* if free space is not available, make more free space */ 297310c27e3dSHong Zhang if (current_space->local_remaining < nzk) { 29741393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 297510c27e3dSHong Zhang i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 29769566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 297710c27e3dSHong Zhang reallocs++; 297810c27e3dSHong Zhang } 297910c27e3dSHong Zhang 298010c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 29819566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 298210c27e3dSHong Zhang 298310c27e3dSHong Zhang /* add the k-th row into il and jl */ 298410c27e3dSHong Zhang if (nzk - 1 > 0) { 29851393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 29869371c9d4SSatish Balay jl[k] = jl[i]; 29879371c9d4SSatish Balay jl[i] = k; 298810c27e3dSHong Zhang il[k] = ui[k] + 1; 298910c27e3dSHong Zhang } 29902ed133dbSHong Zhang ui_ptr[k] = current_space->array; 29912205254eSKarl Rupp 299210c27e3dSHong Zhang current_space->array += nzk; 299310c27e3dSHong Zhang current_space->local_used += nzk; 299410c27e3dSHong Zhang current_space->local_remaining -= nzk; 299510c27e3dSHong Zhang 299610c27e3dSHong Zhang ui[k + 1] = ui[k] + nzk; 299710c27e3dSHong Zhang } 299810c27e3dSHong Zhang 29996cf91177SBarry Smith #if defined(PETSC_USE_INFO) 30001393bc97SHong Zhang if (ai[am] != 0) { 30011393bc97SHong Zhang PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 30029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 30039566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 30049566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 300510c27e3dSHong Zhang } else { 30069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 300710c27e3dSHong Zhang } 300863ba0a88SBarry Smith #endif 300910c27e3dSHong Zhang 30109566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 30119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 30129566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 301310c27e3dSHong Zhang 301410c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 30159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 30169566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 30179566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 301810c27e3dSHong Zhang 301910c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 302010c27e3dSHong Zhang 3021f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 302210c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 3023e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3024e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 30252205254eSKarl Rupp 30269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 30272205254eSKarl Rupp 302810c27e3dSHong Zhang b->j = uj; 302910c27e3dSHong Zhang b->i = ui; 3030f4259b30SLisandro Dalcin b->diag = NULL; 3031f4259b30SLisandro Dalcin b->ilen = NULL; 3032f4259b30SLisandro Dalcin b->imax = NULL; 303310c27e3dSHong Zhang b->row = perm; 30349bfd6278SHong Zhang b->col = perm; 30352205254eSKarl Rupp 30369566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30379566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30382205254eSKarl Rupp 30399bfd6278SHong Zhang b->icol = iperm; 304010c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 30412205254eSKarl Rupp 30429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 30431393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 304410c27e3dSHong Zhang 3045f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 3046f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 30471393bc97SHong Zhang if (ai[am] != 0) { 3048f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 304910c27e3dSHong Zhang } else { 3050f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 305110c27e3dSHong Zhang } 30520a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3053f76d2b81SHong Zhang PetscFunctionReturn(0); 3054f76d2b81SHong Zhang } 3055599ef60dSHong Zhang 30569371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) { 3057813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3058813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3059813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3060813a1f2bSShri Abhyankar PetscScalar *x, sum; 3061813a1f2bSShri Abhyankar const PetscScalar *b; 3062813a1f2bSShri Abhyankar const MatScalar *aa = a->a, *v; 3063813a1f2bSShri Abhyankar PetscInt i, nz; 3064813a1f2bSShri Abhyankar 3065813a1f2bSShri Abhyankar PetscFunctionBegin; 3066813a1f2bSShri Abhyankar if (!n) PetscFunctionReturn(0); 3067813a1f2bSShri Abhyankar 30689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 30699566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3070813a1f2bSShri Abhyankar 3071813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3072813a1f2bSShri Abhyankar x[0] = b[0]; 3073813a1f2bSShri Abhyankar v = aa; 3074813a1f2bSShri Abhyankar vi = aj; 3075813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 3076813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 3077813a1f2bSShri Abhyankar sum = b[i]; 3078813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3079813a1f2bSShri Abhyankar v += nz; 3080813a1f2bSShri Abhyankar vi += nz; 3081813a1f2bSShri Abhyankar x[i] = sum; 3082813a1f2bSShri Abhyankar } 3083813a1f2bSShri Abhyankar 3084813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 308562fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 30863c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 30873c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3088813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3089813a1f2bSShri Abhyankar sum = x[i]; 3090813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 30913c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3092813a1f2bSShri Abhyankar } 3093813a1f2bSShri Abhyankar 30949566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 30959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 30969566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 3097813a1f2bSShri Abhyankar PetscFunctionReturn(0); 3098813a1f2bSShri Abhyankar } 3099813a1f2bSShri Abhyankar 31009371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) { 3101f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3102f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 3103f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3104f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 3105f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 3106f268cba8SShri Abhyankar const PetscScalar *b; 3107f268cba8SShri Abhyankar const MatScalar *aa = a->a, *v; 3108f268cba8SShri Abhyankar 3109f268cba8SShri Abhyankar PetscFunctionBegin; 3110f268cba8SShri Abhyankar if (!n) PetscFunctionReturn(0); 3111f268cba8SShri Abhyankar 31129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 31139566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3114f268cba8SShri Abhyankar tmp = a->solve_work; 3115f268cba8SShri Abhyankar 31169371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 31179371c9d4SSatish Balay r = rout; 31189371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 31199371c9d4SSatish Balay c = cout; 3120f268cba8SShri Abhyankar 3121f268cba8SShri Abhyankar /* forward solve the lower triangular */ 3122f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 3123f268cba8SShri Abhyankar v = aa; 3124f268cba8SShri Abhyankar vi = aj; 3125f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 3126f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 3127f268cba8SShri Abhyankar sum = b[r[i]]; 3128f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3129f268cba8SShri Abhyankar tmp[i] = sum; 31309371c9d4SSatish Balay v += nz; 31319371c9d4SSatish Balay vi += nz; 3132f268cba8SShri Abhyankar } 3133f268cba8SShri Abhyankar 3134f268cba8SShri Abhyankar /* backward solve the upper triangular */ 3135f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 31363c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 31373c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3138f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3139f268cba8SShri Abhyankar sum = tmp[i]; 3140f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3141f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3142f268cba8SShri Abhyankar } 3143f268cba8SShri Abhyankar 31449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 31459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 31469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 31479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 31489566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3149f268cba8SShri Abhyankar PetscFunctionReturn(0); 3150f268cba8SShri Abhyankar } 3151f268cba8SShri Abhyankar 3152fe97e370SBarry Smith /* 31536aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3154fe97e370SBarry Smith */ 31559371c9d4SSatish Balay PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) { 31561d098868SHong Zhang Mat B = *fact; 3157599ef60dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3158599ef60dSHong Zhang IS isicol; 31591d098868SHong Zhang const PetscInt *r, *ic; 31601d098868SHong Zhang PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3161dc3a2fd3SHong Zhang PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3162f61a948aSLisandro Dalcin PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 31631d098868SHong Zhang PetscInt nlnk, *lnk; 31641d098868SHong Zhang PetscBT lnkbt; 3165ace3abfcSBarry Smith PetscBool row_identity, icol_identity; 31668aee2decSHong Zhang MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 31671d098868SHong Zhang const PetscInt *ics; 31681d098868SHong Zhang PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3169a2ea699eSBarry Smith PetscReal dt = info->dt, shift = info->shiftamount; 3170f61a948aSLisandro Dalcin PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3171ace3abfcSBarry Smith PetscBool missing; 3172599ef60dSHong Zhang 3173599ef60dSHong Zhang PetscFunctionBegin; 3174f61a948aSLisandro Dalcin if (dt == PETSC_DEFAULT) dt = 0.005; 3175f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3176f61a948aSLisandro Dalcin 31771d098868SHong Zhang /* ------- symbolic factorization, can be reused ---------*/ 31789566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 317928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 31801d098868SHong Zhang adiag = a->diag; 3181599ef60dSHong Zhang 31829566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3183599ef60dSHong Zhang 3184599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 31859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 31869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3187599ef60dSHong Zhang 31881d098868SHong Zhang /* allocate row pointers bi */ 31899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * n + 2, &bi)); 31901d098868SHong Zhang 31911d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3192393d3a68SHong Zhang if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 31931d098868SHong Zhang nnz_max = ai[n] + 2 * n * dtcount + 2; 31948fc3a347SHong Zhang 31959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 31969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3197599ef60dSHong Zhang 3198599ef60dSHong Zhang /* put together the new matrix */ 31999566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3200f284f12aSHong Zhang b = (Mat_SeqAIJ *)B->data; 32012205254eSKarl Rupp 3202599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3203599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3204599ef60dSHong Zhang b->singlemalloc = PETSC_FALSE; 32052205254eSKarl Rupp 3206599ef60dSHong Zhang b->a = ba; 3207599ef60dSHong Zhang b->j = bj; 3208599ef60dSHong Zhang b->i = bi; 3209599ef60dSHong Zhang b->diag = bdiag; 3210f4259b30SLisandro Dalcin b->ilen = NULL; 3211f4259b30SLisandro Dalcin b->imax = NULL; 3212599ef60dSHong Zhang b->row = isrow; 3213599ef60dSHong Zhang b->col = iscol; 32149566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 32159566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3216599ef60dSHong Zhang b->icol = isicol; 3217599ef60dSHong Zhang 32189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 32191d098868SHong Zhang b->maxnz = nnz_max; 3220599ef60dSHong Zhang 3221d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ILUDT; 3222f284f12aSHong Zhang B->info.factor_mallocs = 0; 3223f284f12aSHong Zhang B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 32241d098868SHong Zhang /* ------- end of symbolic factorization ---------*/ 3225599ef60dSHong Zhang 32269566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 32279566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 3228599ef60dSHong Zhang ics = ic; 3229599ef60dSHong Zhang 3230599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3231599ef60dSHong Zhang nlnk = n + 1; 32329566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 32331d098868SHong Zhang 32341d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 32359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 32361d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 32379566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 32389566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n)); 3239599ef60dSHong Zhang 3240599ef60dSHong Zhang bi[0] = 0; 32418fc3a347SHong Zhang bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3242dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 32438fc3a347SHong Zhang bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3244599ef60dSHong Zhang for (i = 0; i < n; i++) { 3245599ef60dSHong Zhang /* copy initial fill into linked list */ 32468369b28dSHong Zhang nzi = ai[r[i] + 1] - ai[r[i]]; 324794bad497SJacob Faibussowitsch PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 32488369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 32498369b28dSHong Zhang nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3250599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 32519566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3252599ef60dSHong Zhang 3253599ef60dSHong Zhang /* load in initial (unfactored row) */ 32541d098868SHong Zhang aatmp = a->a + ai[r[i]]; 3255ad540459SPierre Jolivet for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3256599ef60dSHong Zhang 3257599ef60dSHong Zhang /* add pivot rows into linked list */ 3258599ef60dSHong Zhang row = lnk[n]; 3259599ef60dSHong Zhang while (row < i) { 32601d098868SHong Zhang nzi_bl = bi[row + 1] - bi[row] + 1; 32611d098868SHong Zhang bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 32629566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3263599ef60dSHong Zhang nzi += nlnk; 3264599ef60dSHong Zhang row = lnk[row]; 3265599ef60dSHong Zhang } 3266599ef60dSHong Zhang 32678369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 32689566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3269599ef60dSHong Zhang 3270599ef60dSHong Zhang /* numerical factorization */ 32718369b28dSHong Zhang bjtmp = jtmp; 3272599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3273599ef60dSHong Zhang while (row < i) { 3274599ef60dSHong Zhang pc = rtmp + row; 32753c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 32763c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3277599ef60dSHong Zhang *pc = multiplier; 32783c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 32791d098868SHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 32801d098868SHong Zhang pv = ba + bdiag[row + 1] + 1; 32811d098868SHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 32821d098868SHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 32839566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3284599ef60dSHong Zhang } 3285599ef60dSHong Zhang row = *bjtmp++; 3286599ef60dSHong Zhang } 3287599ef60dSHong Zhang 32888369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 32898aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 32909371c9d4SSatish Balay nzi_bl = 0; 32919371c9d4SSatish Balay j = 0; 32928369b28dSHong Zhang while (jtmp[j] < i) { /* Note: jtmp is sorted */ 32939371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 32949371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 32959371c9d4SSatish Balay nzi_bl++; 32969371c9d4SSatish Balay j++; 32978369b28dSHong Zhang } 32988369b28dSHong Zhang nzi_bu = nzi - nzi_bl - 1; 32998369b28dSHong Zhang while (j < nzi) { 33009371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 33019371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 33028369b28dSHong Zhang j++; 33038369b28dSHong Zhang } 33048369b28dSHong Zhang 3305599ef60dSHong Zhang bjtmp = bj + bi[i]; 3306599ef60dSHong Zhang batmp = ba + bi[i]; 33078369b28dSHong Zhang /* apply level dropping rule to L part */ 33088369b28dSHong Zhang ncut = nzi_al + dtcount; 33098369b28dSHong Zhang if (ncut < nzi_bl) { 33109566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 33119566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3312599ef60dSHong Zhang } else { 33138369b28dSHong Zhang ncut = nzi_bl; 33148369b28dSHong Zhang } 33158369b28dSHong Zhang for (j = 0; j < ncut; j++) { 33168369b28dSHong Zhang bjtmp[j] = jtmp[j]; 33178369b28dSHong Zhang batmp[j] = vtmp[j]; 33188369b28dSHong Zhang } 33191d098868SHong Zhang bi[i + 1] = bi[i] + ncut; 33208369b28dSHong Zhang nzi = ncut + 1; 33218369b28dSHong Zhang 33228369b28dSHong Zhang /* apply level dropping rule to U part */ 33238369b28dSHong Zhang ncut = nzi_au + dtcount; 33248369b28dSHong Zhang if (ncut < nzi_bu) { 33259566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 33269566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 33278369b28dSHong Zhang } else { 33288369b28dSHong Zhang ncut = nzi_bu; 33298369b28dSHong Zhang } 33308369b28dSHong Zhang nzi += ncut; 33311d098868SHong Zhang 33321d098868SHong Zhang /* mark bdiagonal */ 33331d098868SHong Zhang bdiag[i + 1] = bdiag[i] - (ncut + 1); 3334dc3a2fd3SHong Zhang bdiag_rev[n - i - 1] = bdiag[i + 1]; 33358fc3a347SHong Zhang bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 33361d098868SHong Zhang bjtmp = bj + bdiag[i]; 33371d098868SHong Zhang batmp = ba + bdiag[i]; 33381d098868SHong Zhang *bjtmp = i; 33398aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 3340ad540459SPierre Jolivet if (*batmp == 0.0) *batmp = dt + shift; 3341a5b23f4aSJose E. Roman *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 33421d098868SHong Zhang 33431d098868SHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 33441d098868SHong Zhang batmp = ba + bdiag[i + 1] + 1; 33458369b28dSHong Zhang for (k = 0; k < ncut; k++) { 33461d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl + 1 + k]; 33471d098868SHong Zhang batmp[k] = vtmp[nzi_bl + 1 + k]; 3348599ef60dSHong Zhang } 3349599ef60dSHong Zhang 3350599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 33518369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 335263a3b9bcSJacob Faibussowitsch PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3353599ef60dSHong Zhang 33549566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 33559566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3356599ef60dSHong Zhang 33579566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 33589566063dSJacob Faibussowitsch PetscCall(PetscFree2(im, jtmp)); 33599566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, vtmp)); 33609566063dSJacob Faibussowitsch PetscCall(PetscFree(bdiag_rev)); 3361599ef60dSHong Zhang 33629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n)); 33631d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3364599ef60dSHong Zhang 33659566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 33669566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &icol_identity)); 3367599ef60dSHong Zhang if (row_identity && icol_identity) { 336835233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3369599ef60dSHong Zhang } else { 337035233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ; 3371599ef60dSHong Zhang } 3372599ef60dSHong Zhang 3373f4259b30SLisandro Dalcin B->ops->solveadd = NULL; 3374f4259b30SLisandro Dalcin B->ops->solvetranspose = NULL; 3375f4259b30SLisandro Dalcin B->ops->solvetransposeadd = NULL; 3376f4259b30SLisandro Dalcin B->ops->matsolve = NULL; 3377599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3378599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 3379599ef60dSHong Zhang PetscFunctionReturn(0); 3380599ef60dSHong Zhang } 3381599ef60dSHong Zhang 33823c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */ 3383fe97e370SBarry Smith /* 33846aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3385fe97e370SBarry Smith */ 3386fe97e370SBarry Smith 33879371c9d4SSatish Balay PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 33883c2a7987SHong Zhang PetscFunctionBegin; 33899566063dSJacob Faibussowitsch PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 33903c2a7987SHong Zhang PetscFunctionReturn(0); 33913c2a7987SHong Zhang } 33923c2a7987SHong Zhang 33933c2a7987SHong Zhang /* 33943c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 33953c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 33963c2a7987SHong Zhang */ 3397fe97e370SBarry Smith /* 33986aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3399fe97e370SBarry Smith */ 3400fe97e370SBarry Smith 34019371c9d4SSatish Balay PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) { 34023c2a7987SHong Zhang Mat C = fact; 34033c2a7987SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 34043c2a7987SHong Zhang IS isrow = b->row, isicol = b->icol; 34053c2a7987SHong Zhang const PetscInt *r, *ic, *ics; 3406b78a477dSHong Zhang PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3407b78a477dSHong Zhang PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 34083c2a7987SHong Zhang MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3409182b8fbaSHong Zhang PetscReal dt = info->dt, shift = info->shiftamount; 3410ace3abfcSBarry Smith PetscBool row_identity, col_identity; 34113c2a7987SHong Zhang 34123c2a7987SHong Zhang PetscFunctionBegin; 34139566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 34149566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 34159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 34163c2a7987SHong Zhang ics = ic; 34173c2a7987SHong Zhang 34183c2a7987SHong Zhang for (i = 0; i < n; i++) { 3419b78a477dSHong Zhang /* initialize rtmp array */ 3420b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 34213c2a7987SHong Zhang bjtmp = bj + bi[i]; 3422b78a477dSHong Zhang for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3423b78a477dSHong Zhang rtmp[i] = 0.0; 3424b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3425b78a477dSHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 3426b78a477dSHong Zhang for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 34273c2a7987SHong Zhang 3428b78a477dSHong Zhang /* load in initial unfactored row of A */ 34293c2a7987SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 34303c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 34313c2a7987SHong Zhang v = aa + ai[r[i]]; 3432ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 34333c2a7987SHong Zhang 3434b78a477dSHong Zhang /* numerical factorization */ 3435b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3436b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3437b78a477dSHong Zhang k = 0; 3438b78a477dSHong Zhang while (k < nzl) { 34393c2a7987SHong Zhang row = *bjtmp++; 34403c2a7987SHong Zhang pc = rtmp + row; 3441b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3442b78a477dSHong Zhang multiplier = (*pc) * (*pv); 34433c2a7987SHong Zhang *pc = multiplier; 3444b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt) { 3445b78a477dSHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3446b78a477dSHong Zhang pv = b->a + bdiag[row + 1] + 1; 3447b78a477dSHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3448b78a477dSHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 34499566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 34503c2a7987SHong Zhang } 3451b78a477dSHong Zhang k++; 34523c2a7987SHong Zhang } 34533c2a7987SHong Zhang 3454b78a477dSHong Zhang /* finished row so stick it into b->a */ 3455b78a477dSHong Zhang /* L-part */ 3456b78a477dSHong Zhang pv = b->a + bi[i]; 3457b78a477dSHong Zhang pj = bj + bi[i]; 3458b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; 3459ad540459SPierre Jolivet for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3460b78a477dSHong Zhang 3461a5b23f4aSJose E. Roman /* diagonal: invert diagonal entries for simpler triangular solves */ 3462b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3463b78a477dSHong Zhang b->a[bdiag[i]] = 1.0 / rtmp[i]; 3464b78a477dSHong Zhang 3465b78a477dSHong Zhang /* U-part */ 3466b78a477dSHong Zhang pv = b->a + bdiag[i + 1] + 1; 3467b78a477dSHong Zhang pj = bj + bdiag[i + 1] + 1; 3468b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1] - 1; 3469ad540459SPierre Jolivet for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3470b78a477dSHong Zhang } 3471b78a477dSHong Zhang 34729566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 34739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 34749566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3475b78a477dSHong Zhang 34769566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 34779566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 34783c2a7987SHong Zhang if (row_identity && col_identity) { 347935233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 34803c2a7987SHong Zhang } else { 348135233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 34823c2a7987SHong Zhang } 3483f4259b30SLisandro Dalcin C->ops->solveadd = NULL; 3484f4259b30SLisandro Dalcin C->ops->solvetranspose = NULL; 3485f4259b30SLisandro Dalcin C->ops->solvetransposeadd = NULL; 3486f4259b30SLisandro Dalcin C->ops->matsolve = NULL; 34873c2a7987SHong Zhang C->assembled = PETSC_TRUE; 34883c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 34892205254eSKarl Rupp 34909566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 34913c2a7987SHong Zhang PetscFunctionReturn(0); 34923c2a7987SHong Zhang } 3493