1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 3c6db04a5SJed Brown #include <petscbt.h> 4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 53b2fbd54SBarry Smith 68fa24674SBarry Smith /* 78fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8312e372aSBarry Smith 9312e372aSBarry Smith This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 108fa24674SBarry Smith */ 11ff6a9541SJacob Faibussowitsch #if 0 12d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13d71ae5a4SJacob Faibussowitsch { 148fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15889a8dedSBarry Smith PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 168fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 178fa24674SBarry Smith const PetscScalar *aa = a->a; 18ace3abfcSBarry Smith PetscBool *done; 19889a8dedSBarry Smith PetscReal best, past = 0, future; 203a40ed3dSBarry Smith 218fa24674SBarry Smith PetscFunctionBegin; 228fa24674SBarry Smith /* pick initial row */ 238fa24674SBarry Smith best = -1; 248fa24674SBarry Smith for (i = 0; i < n; i++) { 2575567043SBarry Smith future = 0.0; 268fa24674SBarry Smith for (j = ai[i]; j < ai[i + 1]; j++) { 27db4deed7SKarl Rupp if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28db4deed7SKarl Rupp else past = PetscAbsScalar(aa[j]); 298fa24674SBarry Smith } 308fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 318fa24674SBarry Smith if (past / future > best) { 328fa24674SBarry Smith best = past / future; 338fa24674SBarry Smith current = i; 348fa24674SBarry Smith } 358fa24674SBarry Smith } 368fa24674SBarry Smith 379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &done)); 389566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(done, n)); 399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &order)); 408fa24674SBarry Smith order[0] = current; 418fa24674SBarry Smith for (i = 0; i < n - 1; i++) { 428fa24674SBarry Smith done[current] = PETSC_TRUE; 438fa24674SBarry Smith best = -1; 448fa24674SBarry Smith /* loop over all neighbors of current pivot */ 458fa24674SBarry Smith for (j = ai[current]; j < ai[current + 1]; j++) { 468fa24674SBarry Smith jj = aj[j]; 478fa24674SBarry Smith if (done[jj]) continue; 488fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 498fa24674SBarry Smith past = future = 0.0; 508fa24674SBarry Smith for (k = ai[jj]; k < ai[jj + 1]; k++) { 518fa24674SBarry Smith kk = aj[k]; 528fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 538fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 548fa24674SBarry Smith } 558fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 568fa24674SBarry Smith if (past / future > best) { 578fa24674SBarry Smith best = past / future; 588fa24674SBarry Smith newcurrent = jj; 598fa24674SBarry Smith } 608fa24674SBarry Smith } 618fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 628fa24674SBarry Smith best = -1; 638fa24674SBarry Smith for (k = 0; k < n; k++) { 648fa24674SBarry Smith if (done[k]) continue; 6575567043SBarry Smith future = 0.0; 6675567043SBarry Smith past = 0.0; 678fa24674SBarry Smith for (j = ai[k]; j < ai[k + 1]; j++) { 688fa24674SBarry Smith kk = aj[j]; 698fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 708fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 718fa24674SBarry Smith } 728fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 738fa24674SBarry Smith if (past / future > best) { 748fa24674SBarry Smith best = past / future; 758fa24674SBarry Smith newcurrent = k; 768fa24674SBarry Smith } 778fa24674SBarry Smith } 788fa24674SBarry Smith } 7908401ef6SPierre Jolivet PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 808fa24674SBarry Smith current = newcurrent; 818fa24674SBarry Smith order[i + 1] = current; 828fa24674SBarry Smith } 839566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 848fa24674SBarry Smith *icol = *irow; 859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)*irow)); 869566063dSJacob Faibussowitsch PetscCall(PetscFree(done)); 879566063dSJacob Faibussowitsch PetscCall(PetscFree(order)); 883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 893b2fbd54SBarry Smith } 90ff6a9541SJacob Faibussowitsch #endif 913b2fbd54SBarry Smith 92d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93d71ae5a4SJacob Faibussowitsch { 944ac6704cSBarry Smith PetscFunctionBegin; 954ac6704cSBarry Smith *type = MATSOLVERPETSC; 963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 974ac6704cSBarry Smith } 984ac6704cSBarry Smith 99d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100d71ae5a4SJacob Faibussowitsch { 101d0f46423SBarry Smith PetscInt n = A->rmap->n; 102b24902e0SBarry Smith 103b24902e0SBarry Smith PetscFunctionBegin; 104891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 105*03e5aca4SStefano Zampini if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106*03e5aca4SStefano Zampini PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107*03e5aca4SStefano Zampini *B = NULL; 108*03e5aca4SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS); 109*03e5aca4SStefano Zampini } 110891bceaeSHong Zhang #endif 111*03e5aca4SStefano Zampini 1129566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 1139566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n)); 114599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 1159566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQAIJ)); 1162205254eSKarl Rupp 11735233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 11835233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 1192205254eSKarl Rupp 1209566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 1219566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 1229566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 1239566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1259566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ)); 1269566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 1272205254eSKarl Rupp 12835233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 12935233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 1309566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 1319566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133d5f3da31SBarry Smith (*B)->factortype = ftype; 13400c67f3bSHong Zhang 1359566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype)); 1369566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE; 1389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 1393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 140b24902e0SBarry Smith } 141b24902e0SBarry Smith 142ff6a9541SJacob Faibussowitsch #if 0 143ff6a9541SJacob Faibussowitsch // currently unused 144ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145d71ae5a4SJacob Faibussowitsch { 146416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147289bc588SBarry Smith IS isicol; 1485d0c19d7SBarry Smith const PetscInt *r, *ic; 1495d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 1501393bc97SHong Zhang PetscInt *bi, *bj, *ajtmp; 1511393bc97SHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 1529e046878SBarry Smith PetscReal f; 1534875ba38SHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1540298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 1551393bc97SHong Zhang PetscBT lnkbt; 156ece542f9SHong Zhang PetscBool missing; 157289bc588SBarry Smith 1583a40ed3dSBarry Smith PetscFunctionBegin; 15908401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 1609566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 16128b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162ece542f9SHong Zhang 1639566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 1659566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 166289bc588SBarry Smith 167289bc588SBarry Smith /* get new row pointers */ 1689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 1691393bc97SHong Zhang bi[0] = 0; 1701393bc97SHong Zhang 1711393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1731393bc97SHong Zhang bdiag[0] = 0; 1741393bc97SHong Zhang 1751393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1761393bc97SHong Zhang nlnk = n + 1; 1779566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 1781393bc97SHong Zhang 1799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 1801393bc97SHong Zhang 1811393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 182d3d32019SBarry Smith f = info->fill; 183aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 1849566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1851393bc97SHong Zhang current_space = free_space; 186289bc588SBarry Smith 187289bc588SBarry Smith for (i = 0; i < n; i++) { 1881393bc97SHong Zhang /* copy previous fill into linked list */ 189289bc588SBarry Smith nzi = 0; 1901393bc97SHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 1911393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 1929566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 1931393bc97SHong Zhang nzi += nlnk; 1941393bc97SHong Zhang 1951393bc97SHong Zhang /* add pivot rows into linked list */ 1961393bc97SHong Zhang row = lnk[n]; 1971393bc97SHong Zhang while (row < i) { 198d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 199d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 2009566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 2011393bc97SHong Zhang nzi += nlnk; 2021393bc97SHong Zhang row = lnk[row]; 203289bc588SBarry Smith } 2041393bc97SHong Zhang bi[i + 1] = bi[i] + nzi; 2051393bc97SHong Zhang im[i] = nzi; 2061393bc97SHong Zhang 2071393bc97SHong Zhang /* mark bdiag */ 2081393bc97SHong Zhang nzbd = 0; 2091393bc97SHong Zhang nnz = nzi; 2101393bc97SHong Zhang k = lnk[n]; 2111393bc97SHong Zhang while (nnz-- && k < i) { 2121393bc97SHong Zhang nzbd++; 2131393bc97SHong Zhang k = lnk[k]; 2141393bc97SHong Zhang } 2151393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2161393bc97SHong Zhang 2171393bc97SHong Zhang /* if free space is not available, make more free space */ 2181393bc97SHong Zhang if (current_space->local_remaining < nzi) { 219f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 2209566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2211393bc97SHong Zhang reallocs++; 2221393bc97SHong Zhang } 2231393bc97SHong Zhang 2241393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2259566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 2262205254eSKarl Rupp 2271393bc97SHong Zhang bi_ptr[i] = current_space->array; 2281393bc97SHong Zhang current_space->array += nzi; 2291393bc97SHong Zhang current_space->local_used += nzi; 2301393bc97SHong Zhang current_space->local_remaining -= nzi; 231289bc588SBarry Smith } 2326cf91177SBarry Smith #if defined(PETSC_USE_INFO) 233464e8d44SSatish Balay if (ai[n] != 0) { 2341393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 2389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 23905bf559cSSatish Balay } else { 2409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 24105bf559cSSatish Balay } 24263ba0a88SBarry Smith #endif 2432fb3ed76SBarry Smith 2449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 2461987afe7SBarry Smith 2471393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2499566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 2509566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2519566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 252289bc588SBarry Smith 253289bc588SBarry Smith /* put together the new matrix */ 2549566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 255719d5645SBarry Smith b = (Mat_SeqAIJ *)(B)->data; 2562205254eSKarl Rupp 257e6b907acSBarry Smith b->free_a = PETSC_TRUE; 258e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2597c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2602205254eSKarl Rupp 2619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 2621393bc97SHong Zhang b->j = bj; 2631393bc97SHong Zhang b->i = bi; 2641393bc97SHong Zhang b->diag = bdiag; 265f4259b30SLisandro Dalcin b->ilen = NULL; 266f4259b30SLisandro Dalcin b->imax = NULL; 267416022c9SBarry Smith b->row = isrow; 268416022c9SBarry Smith b->col = iscol; 2699566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 2709566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 27182bf6240SBarry Smith b->icol = isicol; 2729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2731393bc97SHong Zhang 2741393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 2751393bc97SHong Zhang b->maxnz = b->nz = bi[n]; 2767fd98bd6SLois Curfman McInnes 277d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 278719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 279719d5645SBarry Smith (B)->info.fill_ratio_given = f; 280703deb49SSatish Balay 2819f7d0b68SHong Zhang if (ai[n]) { 282719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 283eea2913aSSatish Balay } else { 284719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 285eea2913aSSatish Balay } 286ad04f41aSHong Zhang (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 287ad540459SPierre Jolivet if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 289289bc588SBarry Smith } 290ff6a9541SJacob Faibussowitsch #endif 2911393bc97SHong Zhang 292d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 293d71ae5a4SJacob Faibussowitsch { 294ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 295ce3d78c0SShri Abhyankar IS isicol; 2968758e1faSBarry Smith const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 2978758e1faSBarry Smith PetscInt i, n = A->rmap->n; 2988758e1faSBarry Smith PetscInt *bi, *bj; 299ce3d78c0SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 300ce3d78c0SShri Abhyankar PetscReal f; 301ce3d78c0SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 3020298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 303ce3d78c0SShri Abhyankar PetscBT lnkbt; 304ece542f9SHong Zhang PetscBool missing; 305ce3d78c0SShri Abhyankar 306ce3d78c0SShri Abhyankar PetscFunctionBegin; 30708401ef6SPierre Jolivet PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 3089566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 30928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 310ece542f9SHong Zhang 3119566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3139566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 314ce3d78c0SShri Abhyankar 3151bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 318a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 3199b48462bSShri Abhyankar 3209b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 3219b48462bSShri Abhyankar nlnk = n + 1; 3229566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3239b48462bSShri Abhyankar 3249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 3259b48462bSShri Abhyankar 3269b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 3279b48462bSShri Abhyankar f = info->fill; 328aa91b3e7SRichard Tran Mills if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 3299566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 3309b48462bSShri Abhyankar current_space = free_space; 3319b48462bSShri Abhyankar 3329b48462bSShri Abhyankar for (i = 0; i < n; i++) { 3339b48462bSShri Abhyankar /* copy previous fill into linked list */ 3349b48462bSShri Abhyankar nzi = 0; 3359b48462bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 3369b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 3379566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3389b48462bSShri Abhyankar nzi += nlnk; 3399b48462bSShri Abhyankar 3409b48462bSShri Abhyankar /* add pivot rows into linked list */ 3419b48462bSShri Abhyankar row = lnk[n]; 3429b48462bSShri Abhyankar while (row < i) { 3439b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 3449b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3459566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 3469b48462bSShri Abhyankar nzi += nlnk; 3479b48462bSShri Abhyankar row = lnk[row]; 3489b48462bSShri Abhyankar } 3499b48462bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 3509b48462bSShri Abhyankar im[i] = nzi; 3519b48462bSShri Abhyankar 3529b48462bSShri Abhyankar /* mark bdiag */ 3539b48462bSShri Abhyankar nzbd = 0; 3549b48462bSShri Abhyankar nnz = nzi; 3559b48462bSShri Abhyankar k = lnk[n]; 3569b48462bSShri Abhyankar while (nnz-- && k < i) { 3579b48462bSShri Abhyankar nzbd++; 3589b48462bSShri Abhyankar k = lnk[k]; 3599b48462bSShri Abhyankar } 3609b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 3619b48462bSShri Abhyankar 3629b48462bSShri Abhyankar /* if free space is not available, make more free space */ 3639b48462bSShri Abhyankar if (current_space->local_remaining < nzi) { 3642f18eb33SBarry Smith /* estimated additional space needed */ 3651df4278eSBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 3669566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 3679b48462bSShri Abhyankar reallocs++; 3689b48462bSShri Abhyankar } 3699b48462bSShri Abhyankar 3709b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 3719566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 3722205254eSKarl Rupp 3739b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 3749b48462bSShri Abhyankar current_space->array += nzi; 3759b48462bSShri Abhyankar current_space->local_used += nzi; 3769b48462bSShri Abhyankar current_space->local_remaining -= nzi; 3779b48462bSShri Abhyankar } 3789b48462bSShri Abhyankar 3799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3809566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3819b48462bSShri Abhyankar 3829263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 3839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 3849566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 3859566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 3869566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 3879b48462bSShri Abhyankar 3889b48462bSShri Abhyankar /* put together the new matrix */ 3899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3909b48462bSShri Abhyankar b = (Mat_SeqAIJ *)(B)->data; 3912205254eSKarl Rupp 3929b48462bSShri Abhyankar b->free_a = PETSC_TRUE; 3939b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 3949b48462bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 3952205254eSKarl Rupp 3969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 3972205254eSKarl Rupp 3989b48462bSShri Abhyankar b->j = bj; 3999b48462bSShri Abhyankar b->i = bi; 4009b48462bSShri Abhyankar b->diag = bdiag; 401f4259b30SLisandro Dalcin b->ilen = NULL; 402f4259b30SLisandro Dalcin b->imax = NULL; 4039b48462bSShri Abhyankar b->row = isrow; 4049b48462bSShri Abhyankar b->col = iscol; 4059566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 4069566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 4079b48462bSShri Abhyankar b->icol = isicol; 4089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 4099b48462bSShri Abhyankar 4109b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 4119b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 4122205254eSKarl Rupp 413d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 414f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 415f284f12aSHong Zhang B->info.fill_ratio_given = f; 4169b48462bSShri Abhyankar 4179b48462bSShri Abhyankar if (ai[n]) { 418f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 4199b48462bSShri Abhyankar } else { 420f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 4219b48462bSShri Abhyankar } 4229263d837SHong Zhang #if defined(PETSC_USE_INFO) 4239263d837SHong Zhang if (ai[n] != 0) { 4249263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 4259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 4299263d837SHong Zhang } else { 4309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 4319263d837SHong Zhang } 4329263d837SHong Zhang #endif 43335233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 434ad540459SPierre Jolivet if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 4359566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 4363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4379b48462bSShri Abhyankar } 4389b48462bSShri Abhyankar 4395b5bc046SBarry Smith /* 4405b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4415b5bc046SBarry Smith */ 442d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A) 443d71ae5a4SJacob Faibussowitsch { 444ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 4455b5bc046SBarry Smith 4465b5bc046SBarry Smith PetscFunctionBegin; 4479566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 4485b5bc046SBarry Smith if (flg) { 4495b5bc046SBarry Smith PetscViewer viewer; 4505b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4515b5bc046SBarry Smith 4529566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 4539566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 4549566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 4559566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&viewer)); 4565b5bc046SBarry Smith } 4573ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4585b5bc046SBarry Smith } 4595b5bc046SBarry Smith 460d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 461d71ae5a4SJacob Faibussowitsch { 462c9c72f8fSHong Zhang Mat C = B; 463c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 464c9c72f8fSHong Zhang IS isrow = b->row, isicol = b->icol; 465c9c72f8fSHong Zhang const PetscInt *r, *ic, *ics; 466bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 467bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj; 468bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp; 469bbd65245SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv; 470bbd65245SShri Abhyankar const MatScalar *aa = a->a, *v; 471ace3abfcSBarry Smith PetscBool row_identity, col_identity; 472d90ac83dSHong Zhang FactorShiftCtx sctx; 4734f81c4b7SBarry Smith const PetscInt *ddiag; 474d4ad06f7SHong Zhang PetscReal rs; 475d4ad06f7SHong Zhang MatScalar d; 476d4ad06f7SHong Zhang 477c9c72f8fSHong Zhang PetscFunctionBegin; 478d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 4799566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 480d4ad06f7SHong Zhang 481f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 482d4ad06f7SHong Zhang ddiag = a->diag; 483d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 484d4ad06f7SHong Zhang for (i = 0; i < n; i++) { 485d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 486d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 487d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 488d4ad06f7SHong Zhang v = aa + ai[i]; 489d4ad06f7SHong Zhang nz = ai[i + 1] - ai[i]; 4902205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 491d4ad06f7SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 492d4ad06f7SHong Zhang } 493d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 494d4ad06f7SHong Zhang sctx.nshift_max = 5; 495d4ad06f7SHong Zhang sctx.shift_lo = 0.; 496d4ad06f7SHong Zhang sctx.shift_hi = 1.; 497d4ad06f7SHong Zhang } 498d4ad06f7SHong Zhang 4999566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 5009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 5019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 502c9c72f8fSHong Zhang ics = ic; 503c9c72f8fSHong Zhang 504d4ad06f7SHong Zhang do { 50507b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 506c9c72f8fSHong Zhang for (i = 0; i < n; i++) { 507c9c72f8fSHong Zhang /* zero rtmp */ 508c9c72f8fSHong Zhang /* L part */ 509c9c72f8fSHong Zhang nz = bi[i + 1] - bi[i]; 510c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 511c9c72f8fSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512c9c72f8fSHong Zhang 513c9c72f8fSHong Zhang /* U part */ 51462fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1]; 51562fbd6afSShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1; 516813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 517813a1f2bSShri Abhyankar 518813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 519813a1f2bSShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]]; 520813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 521813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 522ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 523813a1f2bSShri Abhyankar /* ZeropivotApply() */ 52498a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525813a1f2bSShri Abhyankar 526813a1f2bSShri Abhyankar /* elimination */ 527813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 528813a1f2bSShri Abhyankar row = *bjtmp++; 529813a1f2bSShri Abhyankar nzL = bi[i + 1] - bi[i]; 530f268cba8SShri Abhyankar for (k = 0; k < nzL; k++) { 531813a1f2bSShri Abhyankar pc = rtmp + row; 532813a1f2bSShri Abhyankar if (*pc != 0.0) { 533813a1f2bSShri Abhyankar pv = b->a + bdiag[row]; 534813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 535813a1f2bSShri Abhyankar *pc = multiplier; 5362205254eSKarl Rupp 53762fbd6afSShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 53862fbd6afSShri Abhyankar pv = b->a + bdiag[row + 1] + 1; 53962fbd6afSShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 5402205254eSKarl Rupp 541813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 5429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 543813a1f2bSShri Abhyankar } 544f268cba8SShri Abhyankar row = *bjtmp++; 545813a1f2bSShri Abhyankar } 546813a1f2bSShri Abhyankar 547813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 548813a1f2bSShri Abhyankar rs = 0.0; 549813a1f2bSShri Abhyankar /* L part */ 550813a1f2bSShri Abhyankar pv = b->a + bi[i]; 551813a1f2bSShri Abhyankar pj = b->j + bi[i]; 552813a1f2bSShri Abhyankar nz = bi[i + 1] - bi[i]; 553813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5549371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5559371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 556813a1f2bSShri Abhyankar } 557813a1f2bSShri Abhyankar 558813a1f2bSShri Abhyankar /* U part */ 55962fbd6afSShri Abhyankar pv = b->a + bdiag[i + 1] + 1; 56062fbd6afSShri Abhyankar pj = b->j + bdiag[i + 1] + 1; 56162fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1; 562813a1f2bSShri Abhyankar for (j = 0; j < nz; j++) { 5639371c9d4SSatish Balay pv[j] = rtmp[pj[j]]; 5649371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]); 565813a1f2bSShri Abhyankar } 566813a1f2bSShri Abhyankar 567813a1f2bSShri Abhyankar sctx.rs = rs; 568813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 5699566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 57007b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 57107b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 572813a1f2bSShri Abhyankar 573a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */ 574813a1f2bSShri Abhyankar pv = b->a + bdiag[i]; 575813a1f2bSShri Abhyankar *pv = 1.0 / rtmp[i]; 576813a1f2bSShri Abhyankar 577813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 578813a1f2bSShri Abhyankar 5798ff23777SHong Zhang /* MatPivotRefine() */ 58007b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 581813a1f2bSShri Abhyankar /* 582813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 583813a1f2bSShri Abhyankar * then try lower shift 584813a1f2bSShri Abhyankar */ 585813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 586813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 587813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 58807b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 589813a1f2bSShri Abhyankar sctx.nshift++; 590813a1f2bSShri Abhyankar } 59107b50cabSHong Zhang } while (sctx.newshift); 592813a1f2bSShri Abhyankar 5939566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 5949566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 5959566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 596d3ac4fa3SBarry Smith 5979566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 5989566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 599abb87a52SBarry Smith if (b->inode.size) { 600abb87a52SBarry Smith C->ops->solve = MatSolve_SeqAIJ_Inode; 601abb87a52SBarry Smith } else if (row_identity && col_identity) { 60235233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603813a1f2bSShri Abhyankar } else { 60435233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 605813a1f2bSShri Abhyankar } 60635233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 60735233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 60835233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 60935233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 610813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 611813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 6122205254eSKarl Rupp 6139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 614813a1f2bSShri Abhyankar 61514d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 616813a1f2bSShri Abhyankar if (sctx.nshift) { 617f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 6189566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 619f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 6209566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 621f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 6229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 623813a1f2bSShri Abhyankar } 624813a1f2bSShri Abhyankar } 6253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 626813a1f2bSShri Abhyankar } 627813a1f2bSShri Abhyankar 628ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 629ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 630ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 631ff6a9541SJacob Faibussowitsch 632d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 633d71ae5a4SJacob Faibussowitsch { 634719d5645SBarry Smith Mat C = B; 635416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 63682bf6240SBarry Smith IS isrow = b->row, isicol = b->icol; 6375d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 638d278edc7SBarry Smith PetscInt nz, row, i, j, n = A->rmap->n, diag; 639d278edc7SBarry Smith const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 640d278edc7SBarry Smith const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 641d278edc7SBarry Smith MatScalar *pv, *rtmp, *pc, multiplier, d; 642d278edc7SBarry Smith const MatScalar *v, *aa = a->a; 64333f9893dSHong Zhang PetscReal rs = 0.0; 644d90ac83dSHong Zhang FactorShiftCtx sctx; 6454f81c4b7SBarry Smith const PetscInt *ddiag; 646ace3abfcSBarry Smith PetscBool row_identity, col_identity; 647289bc588SBarry Smith 6483a40ed3dSBarry Smith PetscFunctionBegin; 649504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 6509566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 651ada7143aSSatish Balay 652f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 65342898933SHong Zhang ddiag = a->diag; 6549f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 6556cc28720Svictorle for (i = 0; i < n; i++) { 6569f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 6579f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 6581a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 6599f7d0b68SHong Zhang v = aa + ai[i]; 6609f7d0b68SHong Zhang nz = ai[i + 1] - ai[i]; 6612205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 6621a890ab1SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 6636cc28720Svictorle } 664118f5789SBarry Smith sctx.shift_top *= 1.1; 665d2276718SHong Zhang sctx.nshift_max = 5; 666d2276718SHong Zhang sctx.shift_lo = 0.; 667d2276718SHong Zhang sctx.shift_hi = 1.; 668a0a356efSHong Zhang } 669a0a356efSHong Zhang 6709566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 6719566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 6729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 673504a3fadSHong Zhang ics = ic; 674504a3fadSHong Zhang 675085256b3SBarry Smith do { 67607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 677289bc588SBarry Smith for (i = 0; i < n; i++) { 678b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 679b3bf805bSHong Zhang bjtmp = bj + bi[i]; 6809f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 681289bc588SBarry Smith 682289bc588SBarry Smith /* load in initial (unfactored row) */ 6839f7d0b68SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 6849f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 6859f7d0b68SHong Zhang v = aa + ai[r[i]]; 686ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 687d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 688289bc588SBarry Smith 689b3bf805bSHong Zhang row = *bjtmp++; 690f2582111SSatish Balay while (row < i) { 6918c37ef55SBarry Smith pc = rtmp + row; 6928c37ef55SBarry Smith if (*pc != 0.0) { 693010a20caSHong Zhang pv = b->a + diag_offset[row]; 694010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 69535aab85fSBarry Smith multiplier = *pc / *pv++; 6968c37ef55SBarry Smith *pc = multiplier; 697b3bf805bSHong Zhang nz = bi[row + 1] - diag_offset[row] - 1; 6989f7d0b68SHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 6999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 700289bc588SBarry Smith } 701b3bf805bSHong Zhang row = *bjtmp++; 702289bc588SBarry Smith } 703416022c9SBarry Smith /* finished row so stick it into b->a */ 704b3bf805bSHong Zhang pv = b->a + bi[i]; 705b3bf805bSHong Zhang pj = b->j + bi[i]; 706b3bf805bSHong Zhang nz = bi[i + 1] - bi[i]; 707b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 708e57ff13aSBarry Smith rs = 0.0; 709d3d32019SBarry Smith for (j = 0; j < nz; j++) { 7109f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 7119f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 712d3d32019SBarry Smith } 713e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 714d2276718SHong Zhang 715d2276718SHong Zhang sctx.rs = rs; 716d2276718SHong Zhang sctx.pv = pv[diag]; 7179566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 71807b50cabSHong Zhang if (sctx.newshift) break; 719504a3fadSHong Zhang pv[diag] = sctx.pv; 72035aab85fSBarry Smith } 721d2276718SHong Zhang 72207b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 7236cc28720Svictorle /* 7246c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 7256cc28720Svictorle * then try lower shift 7266cc28720Svictorle */ 7270481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 7280481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 7290481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 73007b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 731d2276718SHong Zhang sctx.nshift++; 7326cc28720Svictorle } 73307b50cabSHong Zhang } while (sctx.newshift); 7340f11f9aeSSatish Balay 735a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 736ad540459SPierre Jolivet for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 7379566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 7389566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 7399566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 740d3ac4fa3SBarry Smith 7419566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 7429566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 7438d8c7c43SBarry Smith if (row_identity && col_identity) { 744ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 7458d8c7c43SBarry Smith } else { 746ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 747db4efbfdSBarry Smith } 748ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 7522205254eSKarl Rupp 753c456f294SBarry Smith C->assembled = PETSC_TRUE; 7545c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 7552205254eSKarl Rupp 7569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 757d2276718SHong Zhang if (sctx.nshift) { 758f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7599566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 760f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 7619566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 7626cc28720Svictorle } 7630697b6caSHong Zhang } 764d3ac4fa3SBarry Smith (C)->ops->solve = MatSolve_SeqAIJ_inplace; 765d3ac4fa3SBarry Smith (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 7662205254eSKarl Rupp 7679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode(C)); 7683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 769289bc588SBarry Smith } 7700889b5dcSvictorle 771ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 772ff6a9541SJacob Faibussowitsch 773137fb511SHong Zhang /* 774137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 775137fb511SHong Zhang Input: 776137fb511SHong Zhang A - original matrix 777137fb511SHong Zhang Output; 778137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 779137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 780137fb511SHong Zhang a->a reordered accordingly with a->j 781137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 782137fb511SHong Zhang */ 783d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 784d71ae5a4SJacob Faibussowitsch { 785b051339dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 786b051339dSHong Zhang IS isrow = a->row, isicol = a->icol; 7875d0c19d7SBarry Smith const PetscInt *r, *ic, *ics; 7885d0c19d7SBarry Smith PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 7895d0c19d7SBarry Smith PetscInt *ajtmp, nz, row; 790b051339dSHong Zhang PetscInt *diag = a->diag, nbdiag, *pj; 791a77337e4SBarry Smith PetscScalar *rtmp, *pc, multiplier, d; 792504a3fadSHong Zhang MatScalar *pv, *v; 793137fb511SHong Zhang PetscReal rs; 794d90ac83dSHong Zhang FactorShiftCtx sctx; 795504a3fadSHong Zhang const MatScalar *aa = a->a, *vtmp; 796137fb511SHong Zhang 797137fb511SHong Zhang PetscFunctionBegin; 79808401ef6SPierre Jolivet PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 799504a3fadSHong Zhang 800504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 8019566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 802504a3fadSHong Zhang 803504a3fadSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 804504a3fadSHong Zhang const PetscInt *ddiag = a->diag; 805504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 806504a3fadSHong Zhang for (i = 0; i < n; i++) { 807504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 808504a3fadSHong Zhang d = (aa)[ddiag[i]]; 809504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 810504a3fadSHong Zhang vtmp = aa + ai[i]; 811504a3fadSHong Zhang nz = ai[i + 1] - ai[i]; 8122205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 813504a3fadSHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 814504a3fadSHong Zhang } 815504a3fadSHong Zhang sctx.shift_top *= 1.1; 816504a3fadSHong Zhang sctx.nshift_max = 5; 817504a3fadSHong Zhang sctx.shift_lo = 0.; 818504a3fadSHong Zhang sctx.shift_hi = 1.; 819504a3fadSHong Zhang } 820504a3fadSHong Zhang 8219566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 8229566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 8239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 8249566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n + 1)); 825b051339dSHong Zhang ics = ic; 826137fb511SHong Zhang 827504a3fadSHong Zhang #if defined(MV) 82875567043SBarry Smith sctx.shift_top = 0.; 829e60cf9a0SBarry Smith sctx.nshift_max = 0; 83075567043SBarry Smith sctx.shift_lo = 0.; 83175567043SBarry Smith sctx.shift_hi = 0.; 83275567043SBarry Smith sctx.shift_fraction = 0.; 833137fb511SHong Zhang 834f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 83575567043SBarry Smith sctx.shift_top = 0.; 836137fb511SHong Zhang for (i = 0; i < n; i++) { 837137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 838b051339dSHong Zhang d = (a->a)[diag[i]]; 839137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 840b051339dSHong Zhang v = a->a + ai[i]; 841b051339dSHong Zhang nz = ai[i + 1] - ai[i]; 8422205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 843137fb511SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 844137fb511SHong Zhang } 845137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 846137fb511SHong Zhang sctx.shift_top *= 1.1; 847137fb511SHong Zhang sctx.nshift_max = 5; 848137fb511SHong Zhang sctx.shift_lo = 0.; 849137fb511SHong Zhang sctx.shift_hi = 1.; 850137fb511SHong Zhang } 851137fb511SHong Zhang 85275567043SBarry Smith sctx.shift_amount = 0.; 853137fb511SHong Zhang sctx.nshift = 0; 854504a3fadSHong Zhang #endif 855504a3fadSHong Zhang 856137fb511SHong Zhang do { 85707b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 858137fb511SHong Zhang for (i = 0; i < n; i++) { 859b051339dSHong Zhang /* load in initial unfactored row */ 860b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 861b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 862b051339dSHong Zhang v = a->a + ai[r[i]]; 863137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 864b051339dSHong Zhang for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 8659566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 866137fb511SHong Zhang 867b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 868137fb511SHong Zhang for (j = 0; j < nz; j++) { 869137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 870b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 871137fb511SHong Zhang } 872137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 873137fb511SHong Zhang 874b051339dSHong Zhang row = *ajtmp++; 875137fb511SHong Zhang while (row < i) { 876137fb511SHong Zhang pc = rtmp + row; 877137fb511SHong Zhang if (*pc != 0.0) { 878b051339dSHong Zhang pv = a->a + diag[r[row]]; 879b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 880137fb511SHong Zhang 881137fb511SHong Zhang multiplier = *pc / *pv++; 882137fb511SHong Zhang *pc = multiplier; 883b051339dSHong Zhang nz = ai[r[row] + 1] - diag[r[row]] - 1; 884b051339dSHong Zhang for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 8859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 886137fb511SHong Zhang } 887b051339dSHong Zhang row = *ajtmp++; 888137fb511SHong Zhang } 889b051339dSHong Zhang /* finished row so overwrite it onto a->a */ 890b051339dSHong Zhang pv = a->a + ai[r[i]]; 891b051339dSHong Zhang pj = aj + ai[r[i]]; 892b051339dSHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 893b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 894137fb511SHong Zhang 895137fb511SHong Zhang rs = 0.0; 896137fb511SHong Zhang for (j = 0; j < nz; j++) { 897b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 898b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 899137fb511SHong Zhang } 900137fb511SHong Zhang 901137fb511SHong Zhang sctx.rs = rs; 902b051339dSHong Zhang sctx.pv = pv[nbdiag]; 9039566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 90407b50cabSHong Zhang if (sctx.newshift) break; 905504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 906137fb511SHong Zhang } 907137fb511SHong Zhang 90807b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 909137fb511SHong Zhang /* 910137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 911137fb511SHong Zhang * then try lower shift 912137fb511SHong Zhang */ 9130481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 9140481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 9150481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 91607b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 917137fb511SHong Zhang sctx.nshift++; 918137fb511SHong Zhang } 91907b50cabSHong Zhang } while (sctx.newshift); 920137fb511SHong Zhang 921a5b23f4aSJose E. Roman /* invert diagonal entries for simpler triangular solves */ 922ad540459SPierre Jolivet for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 923137fb511SHong Zhang 9249566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 9259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 9269566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 9272205254eSKarl Rupp 928b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 929ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 930ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 931ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 9322205254eSKarl Rupp 933b051339dSHong Zhang A->assembled = PETSC_TRUE; 9345c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 9352205254eSKarl Rupp 9369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n)); 937137fb511SHong Zhang if (sctx.nshift) { 938f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 9399566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 940f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 9419566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 942137fb511SHong Zhang } 943137fb511SHong Zhang } 9443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 945137fb511SHong Zhang } 946137fb511SHong Zhang 947d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 948d71ae5a4SJacob Faibussowitsch { 949416022c9SBarry Smith Mat C; 950416022c9SBarry Smith 9513a40ed3dSBarry Smith PetscFunctionBegin; 9529566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 9539566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 9549566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info)); 9552205254eSKarl Rupp 956db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 957db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 9582205254eSKarl Rupp 9599566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C)); 9603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 961da3a660dSBarry Smith } 9621d098868SHong Zhang 963d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 964d71ae5a4SJacob Faibussowitsch { 965416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 966416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 9675d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 9685d0c19d7SBarry Smith PetscInt nz; 9695d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 970dd6ea824SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 971d9fead3dSBarry Smith const PetscScalar *b; 972dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 9738c37ef55SBarry Smith 9743a40ed3dSBarry Smith PetscFunctionBegin; 9753ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 97691bf9adeSBarry Smith 9779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 9789566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 979416022c9SBarry Smith tmp = a->solve_work; 9808c37ef55SBarry Smith 9819371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 9829371c9d4SSatish Balay r = rout; 9839371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 9849371c9d4SSatish Balay c = cout + (n - 1); 9858c37ef55SBarry Smith 9868c37ef55SBarry Smith /* forward solve the lower triangular */ 9878c37ef55SBarry Smith tmp[0] = b[*r++]; 988010a20caSHong Zhang tmps = tmp; 9898c37ef55SBarry Smith for (i = 1; i < n; i++) { 990010a20caSHong Zhang v = aa + ai[i]; 991010a20caSHong Zhang vi = aj + ai[i]; 99253e7425aSBarry Smith nz = a->diag[i] - ai[i]; 99353e7425aSBarry Smith sum = b[*r++]; 994003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 9958c37ef55SBarry Smith tmp[i] = sum; 9968c37ef55SBarry Smith } 9978c37ef55SBarry Smith 9988c37ef55SBarry Smith /* backward solve the upper triangular */ 9998c37ef55SBarry Smith for (i = n - 1; i >= 0; i--) { 1000010a20caSHong Zhang v = aa + a->diag[i] + 1; 1001010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1002416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 10038c37ef55SBarry Smith sum = tmp[i]; 1004003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1005010a20caSHong Zhang x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 10068c37ef55SBarry Smith } 10078c37ef55SBarry Smith 10089566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10109566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 10119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 10129566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 10133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10148c37ef55SBarry Smith } 1015026e39d0SSatish Balay 1016ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1017d71ae5a4SJacob Faibussowitsch { 10182bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10192bebee5dSHong Zhang IS iscol = a->col, isrow = a->row; 10205d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1021910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10225d0c19d7SBarry Smith const PetscInt *rout, *cout, *r, *c; 1023910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1024910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1025910cf402Sprj- PetscBool isdense; 10262bebee5dSHong Zhang 10272bebee5dSHong Zhang PetscFunctionBegin; 10283ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 10299566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 103028b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1031f9fb9879SHong Zhang if (X != B) { 10329566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 103328b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1034f9fb9879SHong Zhang } 10359566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10369566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10379566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10389566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10399371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10409371c9d4SSatish Balay r = rout; 10419371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 10429371c9d4SSatish Balay c = cout; 1043a36b22ccSSatish Balay for (neq = 0; neq < B->cmap->n; neq++) { 10442bebee5dSHong Zhang /* forward solve the lower triangular */ 10452bebee5dSHong Zhang tmp[0] = b[r[0]]; 10462bebee5dSHong Zhang tmps = tmp; 10472bebee5dSHong Zhang for (i = 1; i < n; i++) { 10482bebee5dSHong Zhang v = aa + ai[i]; 10492bebee5dSHong Zhang vi = aj + ai[i]; 10502bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 10512bebee5dSHong Zhang sum = b[r[i]]; 1052003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10532bebee5dSHong Zhang tmp[i] = sum; 10542bebee5dSHong Zhang } 10552bebee5dSHong Zhang /* backward solve the upper triangular */ 10562bebee5dSHong Zhang for (i = n - 1; i >= 0; i--) { 10572bebee5dSHong Zhang v = aa + a->diag[i] + 1; 10582bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 10592bebee5dSHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 10602bebee5dSHong Zhang sum = tmp[i]; 1061003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 10622bebee5dSHong Zhang x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 10632bebee5dSHong Zhang } 1064910cf402Sprj- b += ldb; 1065910cf402Sprj- x += ldx; 10662bebee5dSHong Zhang } 10679566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 10689566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 10699566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 10709566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 10719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 10723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10732bebee5dSHong Zhang } 10742bebee5dSHong Zhang 1075d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1076d71ae5a4SJacob Faibussowitsch { 10779bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 10789bd0847aSShri Abhyankar IS iscol = a->col, isrow = a->row; 10799bd0847aSShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1080910cf402Sprj- PetscInt nz, neq, ldb, ldx; 10819bd0847aSShri Abhyankar const PetscInt *rout, *cout, *r, *c; 1082910cf402Sprj- PetscScalar *x, *tmp = a->solve_work, sum; 1083910cf402Sprj- const PetscScalar *b, *aa = a->a, *v; 1084910cf402Sprj- PetscBool isdense; 10859bd0847aSShri Abhyankar 10869bd0847aSShri Abhyankar PetscFunctionBegin; 10873ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 10889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 108928b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1090f9fb9879SHong Zhang if (X != B) { 10919566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 109228b400f6SJacob Faibussowitsch PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1093f9fb9879SHong Zhang } 10949566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 10959566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &ldb)); 10969566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 10979566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(X, &ldx)); 10989371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 10999371c9d4SSatish Balay r = rout; 11009371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11019371c9d4SSatish Balay c = cout; 11029bd0847aSShri Abhyankar for (neq = 0; neq < B->cmap->n; neq++) { 11039bd0847aSShri Abhyankar /* forward solve the lower triangular */ 11049bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 11059bd0847aSShri Abhyankar v = aa; 11069bd0847aSShri Abhyankar vi = aj; 11079bd0847aSShri Abhyankar for (i = 1; i < n; i++) { 11089bd0847aSShri Abhyankar nz = ai[i + 1] - ai[i]; 11099bd0847aSShri Abhyankar sum = b[r[i]]; 11109bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 11119bd0847aSShri Abhyankar tmp[i] = sum; 11129371c9d4SSatish Balay v += nz; 11139371c9d4SSatish Balay vi += nz; 11149bd0847aSShri Abhyankar } 11159bd0847aSShri Abhyankar /* backward solve the upper triangular */ 11169bd0847aSShri Abhyankar for (i = n - 1; i >= 0; i--) { 11179bd0847aSShri Abhyankar v = aa + adiag[i + 1] + 1; 11189bd0847aSShri Abhyankar vi = aj + adiag[i + 1] + 1; 11199bd0847aSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 11209bd0847aSShri Abhyankar sum = tmp[i]; 11219bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 11229bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 11239bd0847aSShri Abhyankar } 1124910cf402Sprj- b += ldb; 1125910cf402Sprj- x += ldx; 11269bd0847aSShri Abhyankar } 11279566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11289566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11299566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 11309566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 11319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 11323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11339bd0847aSShri Abhyankar } 11349bd0847aSShri Abhyankar 1135ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1136d71ae5a4SJacob Faibussowitsch { 1137137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1138137fb511SHong Zhang IS iscol = a->col, isrow = a->row; 11395d0c19d7SBarry Smith const PetscInt *r, *c, *rout, *cout; 11405d0c19d7SBarry Smith PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 11415d0c19d7SBarry Smith PetscInt nz, row; 1142fdc842d1SBarry Smith PetscScalar *x, *tmp, *tmps, sum; 1143fdc842d1SBarry Smith const PetscScalar *b; 1144dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1145137fb511SHong Zhang 1146137fb511SHong Zhang PetscFunctionBegin; 11473ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1148137fb511SHong Zhang 11499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 11509566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1151137fb511SHong Zhang tmp = a->solve_work; 1152137fb511SHong Zhang 11539371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 11549371c9d4SSatish Balay r = rout; 11559371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 11569371c9d4SSatish Balay c = cout + (n - 1); 1157137fb511SHong Zhang 1158137fb511SHong Zhang /* forward solve the lower triangular */ 1159137fb511SHong Zhang tmp[0] = b[*r++]; 1160137fb511SHong Zhang tmps = tmp; 1161137fb511SHong Zhang for (row = 1; row < n; row++) { 1162137fb511SHong Zhang i = rout[row]; /* permuted row */ 1163137fb511SHong Zhang v = aa + ai[i]; 1164137fb511SHong Zhang vi = aj + ai[i]; 1165137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1166137fb511SHong Zhang sum = b[*r++]; 1167003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1168137fb511SHong Zhang tmp[row] = sum; 1169137fb511SHong Zhang } 1170137fb511SHong Zhang 1171137fb511SHong Zhang /* backward solve the upper triangular */ 1172137fb511SHong Zhang for (row = n - 1; row >= 0; row--) { 1173137fb511SHong Zhang i = rout[row]; /* permuted row */ 1174137fb511SHong Zhang v = aa + a->diag[i] + 1; 1175137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1176137fb511SHong Zhang nz = ai[i + 1] - a->diag[i] - 1; 1177137fb511SHong Zhang sum = tmp[row]; 1178003131ecSBarry Smith PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1179137fb511SHong Zhang x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1180137fb511SHong Zhang } 1181137fb511SHong Zhang 11829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 11839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 11849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 11859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 11869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 11873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1188137fb511SHong Zhang } 1189137fb511SHong Zhang 1190c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1191ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1192d71ae5a4SJacob Faibussowitsch { 1193930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1194d0f46423SBarry Smith PetscInt n = A->rmap->n; 1195b377110cSBarry Smith const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1196356650c2SBarry Smith PetscScalar *x; 1197356650c2SBarry Smith const PetscScalar *b; 1198dd6ea824SBarry Smith const MatScalar *aa = a->a; 1199aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1200356650c2SBarry Smith PetscInt adiag_i, i, nz, ai_i; 1201b377110cSBarry Smith const PetscInt *vi; 1202dd6ea824SBarry Smith const MatScalar *v; 1203dd6ea824SBarry Smith PetscScalar sum; 1204d85afc42SSatish Balay #endif 1205930ae53cSBarry Smith 12063a40ed3dSBarry Smith PetscFunctionBegin; 12073ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1208930ae53cSBarry Smith 12099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12109566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1211930ae53cSBarry Smith 1212aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 12131c4feecaSSatish Balay fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 12141c4feecaSSatish Balay #else 1215930ae53cSBarry Smith /* forward solve the lower triangular */ 1216930ae53cSBarry Smith x[0] = b[0]; 1217930ae53cSBarry Smith for (i = 1; i < n; i++) { 1218930ae53cSBarry Smith ai_i = ai[i]; 1219930ae53cSBarry Smith v = aa + ai_i; 1220930ae53cSBarry Smith vi = aj + ai_i; 1221930ae53cSBarry Smith nz = adiag[i] - ai_i; 1222930ae53cSBarry Smith sum = b[i]; 1223003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1224930ae53cSBarry Smith x[i] = sum; 1225930ae53cSBarry Smith } 1226930ae53cSBarry Smith 1227930ae53cSBarry Smith /* backward solve the upper triangular */ 1228930ae53cSBarry Smith for (i = n - 1; i >= 0; i--) { 1229930ae53cSBarry Smith adiag_i = adiag[i]; 1230d708749eSBarry Smith v = aa + adiag_i + 1; 1231d708749eSBarry Smith vi = aj + adiag_i + 1; 1232930ae53cSBarry Smith nz = ai[i + 1] - adiag_i - 1; 1233930ae53cSBarry Smith sum = x[i]; 1234003131ecSBarry Smith PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1235930ae53cSBarry Smith x[i] = sum * aa[adiag_i]; 1236930ae53cSBarry Smith } 12371c4feecaSSatish Balay #endif 12389566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 12399566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12409566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 12413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1242930ae53cSBarry Smith } 1243930ae53cSBarry Smith 1244ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1245d71ae5a4SJacob Faibussowitsch { 1246416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1247416022c9SBarry Smith IS iscol = a->col, isrow = a->row; 124804fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 12495d0c19d7SBarry Smith PetscInt nz; 125004fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 125104fbf559SBarry Smith PetscScalar *x, *tmp, sum; 125204fbf559SBarry Smith const PetscScalar *b; 1253dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 1254da3a660dSBarry Smith 12553a40ed3dSBarry Smith PetscFunctionBegin; 12569566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 1257da3a660dSBarry Smith 12589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 12599566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1260416022c9SBarry Smith tmp = a->solve_work; 1261da3a660dSBarry Smith 12629371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 12639371c9d4SSatish Balay r = rout; 12649371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 12659371c9d4SSatish Balay c = cout + (n - 1); 1266da3a660dSBarry Smith 1267da3a660dSBarry Smith /* forward solve the lower triangular */ 1268da3a660dSBarry Smith tmp[0] = b[*r++]; 1269da3a660dSBarry Smith for (i = 1; i < n; i++) { 1270010a20caSHong Zhang v = aa + ai[i]; 1271010a20caSHong Zhang vi = aj + ai[i]; 1272416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1273da3a660dSBarry Smith sum = b[*r++]; 127404fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1275da3a660dSBarry Smith tmp[i] = sum; 1276da3a660dSBarry Smith } 1277da3a660dSBarry Smith 1278da3a660dSBarry Smith /* backward solve the upper triangular */ 1279da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1280010a20caSHong Zhang v = aa + a->diag[i] + 1; 1281010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1282416022c9SBarry Smith nz = ai[i + 1] - a->diag[i] - 1; 1283da3a660dSBarry Smith sum = tmp[i]; 128404fbf559SBarry Smith for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1285010a20caSHong Zhang tmp[i] = sum * aa[a->diag[i]]; 1286da3a660dSBarry Smith x[*c--] += tmp[i]; 1287da3a660dSBarry Smith } 1288da3a660dSBarry Smith 12899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 12909566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 12919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 12929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 12939566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 12943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1295da3a660dSBarry Smith } 129604fbf559SBarry Smith 1297d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1298d71ae5a4SJacob Faibussowitsch { 12993c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13003c0119dfSShri Abhyankar IS iscol = a->col, isrow = a->row; 13013c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n, j; 13023c0119dfSShri Abhyankar PetscInt nz; 13033c0119dfSShri Abhyankar const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 13043c0119dfSShri Abhyankar PetscScalar *x, *tmp, sum; 13053c0119dfSShri Abhyankar const PetscScalar *b; 13063c0119dfSShri Abhyankar const MatScalar *aa = a->a, *v; 13073c0119dfSShri Abhyankar 13083c0119dfSShri Abhyankar PetscFunctionBegin; 13099566063dSJacob Faibussowitsch if (yy != xx) PetscCall(VecCopy(yy, xx)); 13103c0119dfSShri Abhyankar 13119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13129566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 13133c0119dfSShri Abhyankar tmp = a->solve_work; 13143c0119dfSShri Abhyankar 13159371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13169371c9d4SSatish Balay r = rout; 13179371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13189371c9d4SSatish Balay c = cout; 13193c0119dfSShri Abhyankar 13203c0119dfSShri Abhyankar /* forward solve the lower triangular */ 13213c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 13223c0119dfSShri Abhyankar v = aa; 13233c0119dfSShri Abhyankar vi = aj; 13243c0119dfSShri Abhyankar for (i = 1; i < n; i++) { 13253c0119dfSShri Abhyankar nz = ai[i + 1] - ai[i]; 13263c0119dfSShri Abhyankar sum = b[r[i]]; 13273c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 13283c0119dfSShri Abhyankar tmp[i] = sum; 13292205254eSKarl Rupp v += nz; 13302205254eSKarl Rupp vi += nz; 13313c0119dfSShri Abhyankar } 13323c0119dfSShri Abhyankar 13333c0119dfSShri Abhyankar /* backward solve the upper triangular */ 13343c0119dfSShri Abhyankar v = aa + adiag[n - 1]; 13353c0119dfSShri Abhyankar vi = aj + adiag[n - 1]; 13363c0119dfSShri Abhyankar for (i = n - 1; i >= 0; i--) { 13373c0119dfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 13383c0119dfSShri Abhyankar sum = tmp[i]; 13393c0119dfSShri Abhyankar for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 13403c0119dfSShri Abhyankar tmp[i] = sum * v[nz]; 13413c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 13429371c9d4SSatish Balay v += nz + 1; 13439371c9d4SSatish Balay vi += nz + 1; 13443c0119dfSShri Abhyankar } 13453c0119dfSShri Abhyankar 13469566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 13479566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 13489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 13499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 13509566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz)); 13513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13523c0119dfSShri Abhyankar } 13533c0119dfSShri Abhyankar 1354d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1355d71ae5a4SJacob Faibussowitsch { 1356416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 13572235e667SBarry Smith IS iscol = a->col, isrow = a->row; 135804fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 135904fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 136004fbf559SBarry Smith PetscInt nz; 136104fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1362dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 136304fbf559SBarry Smith const PetscScalar *b; 1364da3a660dSBarry Smith 13653a40ed3dSBarry Smith PetscFunctionBegin; 13669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 13679566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1368416022c9SBarry Smith tmp = a->solve_work; 1369da3a660dSBarry Smith 13709371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 13719371c9d4SSatish Balay r = rout; 13729371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 13739371c9d4SSatish Balay c = cout; 1374da3a660dSBarry Smith 1375da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 13762235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1377da3a660dSBarry Smith 1378da3a660dSBarry Smith /* forward solve the U^T */ 1379da3a660dSBarry Smith for (i = 0; i < n; i++) { 1380010a20caSHong Zhang v = aa + diag[i]; 1381010a20caSHong Zhang vi = aj + diag[i] + 1; 1382f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 1383c38d4ed2SBarry Smith s1 = tmp[i]; 1384ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 138504fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1386c38d4ed2SBarry Smith tmp[i] = s1; 1387da3a660dSBarry Smith } 1388da3a660dSBarry Smith 1389da3a660dSBarry Smith /* backward solve the L^T */ 1390da3a660dSBarry Smith for (i = n - 1; i >= 0; i--) { 1391010a20caSHong Zhang v = aa + diag[i] - 1; 1392010a20caSHong Zhang vi = aj + diag[i] - 1; 1393f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1394f1af5d2fSBarry Smith s1 = tmp[i]; 139504fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1396da3a660dSBarry Smith } 1397da3a660dSBarry Smith 1398da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1399591294e4SBarry Smith for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1400da3a660dSBarry Smith 14019566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14029566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1405da3a660dSBarry Smith 14069566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1408da3a660dSBarry Smith } 1409da3a660dSBarry Smith 1410d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1411d71ae5a4SJacob Faibussowitsch { 1412d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1413d1fa9404SShri Abhyankar IS iscol = a->col, isrow = a->row; 1414d1fa9404SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1415d1fa9404SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1416d1fa9404SShri Abhyankar PetscInt nz; 1417d1fa9404SShri Abhyankar PetscScalar *x, *tmp, s1; 1418d1fa9404SShri Abhyankar const MatScalar *aa = a->a, *v; 1419d1fa9404SShri Abhyankar const PetscScalar *b; 1420d1fa9404SShri Abhyankar 1421d1fa9404SShri Abhyankar PetscFunctionBegin; 14229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14239566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 1424d1fa9404SShri Abhyankar tmp = a->solve_work; 1425d1fa9404SShri Abhyankar 14269371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14279371c9d4SSatish Balay r = rout; 14289371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14299371c9d4SSatish Balay c = cout; 1430d1fa9404SShri Abhyankar 1431d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1432d1fa9404SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1433d1fa9404SShri Abhyankar 1434d1fa9404SShri Abhyankar /* forward solve the U^T */ 1435d1fa9404SShri Abhyankar for (i = 0; i < n; i++) { 1436d1fa9404SShri Abhyankar v = aa + adiag[i + 1] + 1; 1437d1fa9404SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1438d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1439d1fa9404SShri Abhyankar s1 = tmp[i]; 1440d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1441d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1442d1fa9404SShri Abhyankar tmp[i] = s1; 1443d1fa9404SShri Abhyankar } 1444d1fa9404SShri Abhyankar 1445d1fa9404SShri Abhyankar /* backward solve the L^T */ 1446d1fa9404SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1447d1fa9404SShri Abhyankar v = aa + ai[i]; 1448d1fa9404SShri Abhyankar vi = aj + ai[i]; 1449d1fa9404SShri Abhyankar nz = ai[i + 1] - ai[i]; 1450d1fa9404SShri Abhyankar s1 = tmp[i]; 1451d1fa9404SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1452d1fa9404SShri Abhyankar } 1453d1fa9404SShri Abhyankar 1454d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1455d1fa9404SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1456d1fa9404SShri Abhyankar 14579566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 14589566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 14599566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 14609566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 1461d1fa9404SShri Abhyankar 14629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 14633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1464d1fa9404SShri Abhyankar } 1465d1fa9404SShri Abhyankar 1466d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1467d71ae5a4SJacob Faibussowitsch { 1468416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 14692235e667SBarry Smith IS iscol = a->col, isrow = a->row; 147004fbf559SBarry Smith const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 147104fbf559SBarry Smith PetscInt i, n = A->rmap->n, j; 147204fbf559SBarry Smith PetscInt nz; 147304fbf559SBarry Smith PetscScalar *x, *tmp, s1; 1474dd6ea824SBarry Smith const MatScalar *aa = a->a, *v; 147504fbf559SBarry Smith const PetscScalar *b; 14766abc6512SBarry Smith 14773a40ed3dSBarry Smith PetscFunctionBegin; 14789566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 14799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 14809566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1481416022c9SBarry Smith tmp = a->solve_work; 14826abc6512SBarry Smith 14839371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 14849371c9d4SSatish Balay r = rout; 14859371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 14869371c9d4SSatish Balay c = cout; 14876abc6512SBarry Smith 14886abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 14892235e667SBarry Smith for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 14906abc6512SBarry Smith 14916abc6512SBarry Smith /* forward solve the U^T */ 14926abc6512SBarry Smith for (i = 0; i < n; i++) { 1493010a20caSHong Zhang v = aa + diag[i]; 1494010a20caSHong Zhang vi = aj + diag[i] + 1; 1495f1af5d2fSBarry Smith nz = ai[i + 1] - diag[i] - 1; 149604fbf559SBarry Smith s1 = tmp[i]; 149704fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 149804fbf559SBarry Smith for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 149904fbf559SBarry Smith tmp[i] = s1; 15006abc6512SBarry Smith } 15016abc6512SBarry Smith 15026abc6512SBarry Smith /* backward solve the L^T */ 15036abc6512SBarry Smith for (i = n - 1; i >= 0; i--) { 1504010a20caSHong Zhang v = aa + diag[i] - 1; 1505010a20caSHong Zhang vi = aj + diag[i] - 1; 1506f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 150704fbf559SBarry Smith s1 = tmp[i]; 150804fbf559SBarry Smith for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 15096abc6512SBarry Smith } 15106abc6512SBarry Smith 15116abc6512SBarry Smith /* copy tmp into x according to permutation */ 15126abc6512SBarry Smith for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 15136abc6512SBarry Smith 15149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 15169566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 15186abc6512SBarry Smith 15199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 15203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1521da3a660dSBarry Smith } 152204fbf559SBarry Smith 1523d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1524d71ae5a4SJacob Faibussowitsch { 1525533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1526533e6011SShri Abhyankar IS iscol = a->col, isrow = a->row; 1527533e6011SShri Abhyankar const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1528533e6011SShri Abhyankar PetscInt i, n = A->rmap->n, j; 1529533e6011SShri Abhyankar PetscInt nz; 1530533e6011SShri Abhyankar PetscScalar *x, *tmp, s1; 1531533e6011SShri Abhyankar const MatScalar *aa = a->a, *v; 1532533e6011SShri Abhyankar const PetscScalar *b; 1533533e6011SShri Abhyankar 1534533e6011SShri Abhyankar PetscFunctionBegin; 15359566063dSJacob Faibussowitsch if (zz != xx) PetscCall(VecCopy(zz, xx)); 15369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 15379566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x)); 1538533e6011SShri Abhyankar tmp = a->solve_work; 1539533e6011SShri Abhyankar 15409371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 15419371c9d4SSatish Balay r = rout; 15429371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 15439371c9d4SSatish Balay c = cout; 1544533e6011SShri Abhyankar 1545533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1546533e6011SShri Abhyankar for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1547533e6011SShri Abhyankar 1548533e6011SShri Abhyankar /* forward solve the U^T */ 1549533e6011SShri Abhyankar for (i = 0; i < n; i++) { 1550533e6011SShri Abhyankar v = aa + adiag[i + 1] + 1; 1551533e6011SShri Abhyankar vi = aj + adiag[i + 1] + 1; 1552533e6011SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 1553533e6011SShri Abhyankar s1 = tmp[i]; 1554533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1555533e6011SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1556533e6011SShri Abhyankar tmp[i] = s1; 1557533e6011SShri Abhyankar } 1558533e6011SShri Abhyankar 1559533e6011SShri Abhyankar /* backward solve the L^T */ 1560533e6011SShri Abhyankar for (i = n - 1; i >= 0; i--) { 1561533e6011SShri Abhyankar v = aa + ai[i]; 1562533e6011SShri Abhyankar vi = aj + ai[i]; 1563533e6011SShri Abhyankar nz = ai[i + 1] - ai[i]; 1564533e6011SShri Abhyankar s1 = tmp[i]; 1565c5e3b2a3SShri Abhyankar for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1566533e6011SShri Abhyankar } 1567533e6011SShri Abhyankar 1568533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1569533e6011SShri Abhyankar for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1570533e6011SShri Abhyankar 15719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 15729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 15739566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 15749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x)); 1575533e6011SShri Abhyankar 15769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 15773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1578533e6011SShri Abhyankar } 1579533e6011SShri Abhyankar 15808fc3a347SHong Zhang /* 15818a76ed4fSHong Zhang ilu() under revised new data structure. 1582813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1583813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1584813a1f2bSShri Abhyankar 1585813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1586baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1587baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1588baabb860SHong Zhang 1589813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1590baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1591a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1592813a1f2bSShri Abhyankar 1593813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1594813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1595813a1f2bSShri Abhyankar */ 1596d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1597d71ae5a4SJacob Faibussowitsch { 1598813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1599bbd65245SShri Abhyankar const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1600bbd65245SShri Abhyankar PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 16011df811f5SHong Zhang IS isicol; 1602813a1f2bSShri Abhyankar 1603813a1f2bSShri Abhyankar PetscFunctionBegin; 16049566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 16059566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1606813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 1607813a1f2bSShri Abhyankar 1608813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 16099566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 16102205254eSKarl Rupp 1611813a1f2bSShri Abhyankar b->singlemalloc = PETSC_TRUE; 1612aa624791SPierre Jolivet if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1613813a1f2bSShri Abhyankar bdiag = b->diag; 1614813a1f2bSShri Abhyankar 161548a46eb9SPierre Jolivet if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1616813a1f2bSShri Abhyankar 1617813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1618813a1f2bSShri Abhyankar bi = b->i; 1619813a1f2bSShri Abhyankar bj = b->j; 1620813a1f2bSShri Abhyankar 1621813a1f2bSShri Abhyankar /* L part */ 1622e60cf9a0SBarry Smith bi[0] = 0; 1623813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1624813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1625813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nz; 1626813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1627ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1628813a1f2bSShri Abhyankar } 1629813a1f2bSShri Abhyankar 1630813a1f2bSShri Abhyankar /* U part */ 163162fbd6afSShri Abhyankar bdiag[n] = bi[n] - 1; 1632813a1f2bSShri Abhyankar for (i = n - 1; i >= 0; i--) { 1633813a1f2bSShri Abhyankar nz = ai[i + 1] - adiag[i] - 1; 1634813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1635ad540459SPierre Jolivet for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1636813a1f2bSShri Abhyankar /* diag[i] */ 1637bbd65245SShri Abhyankar bj[k++] = i; 1638a24f213cSHong Zhang bdiag[i] = bdiag[i + 1] + nz + 1; 1639813a1f2bSShri Abhyankar } 16401df811f5SHong Zhang 1641d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 16421df811f5SHong Zhang fact->info.factor_mallocs = 0; 16431df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 16441df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 164535233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 16469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 16471df811f5SHong Zhang 16481df811f5SHong Zhang b = (Mat_SeqAIJ *)(fact)->data; 16491df811f5SHong Zhang b->row = isrow; 16501df811f5SHong Zhang b->col = iscol; 16511df811f5SHong Zhang b->icol = isicol; 16529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 16539566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 16549566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 16553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1656813a1f2bSShri Abhyankar } 1657813a1f2bSShri Abhyankar 1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1659d71ae5a4SJacob Faibussowitsch { 1660813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1661813a1f2bSShri Abhyankar IS isicol; 1662813a1f2bSShri Abhyankar const PetscInt *r, *ic; 16631df811f5SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1664813a1f2bSShri Abhyankar PetscInt *bi, *cols, nnz, *cols_lvl; 1665813a1f2bSShri Abhyankar PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1666813a1f2bSShri Abhyankar PetscInt i, levels, diagonal_fill; 1667035f4963SHong Zhang PetscBool col_identity, row_identity, missing; 1668813a1f2bSShri Abhyankar PetscReal f; 16690298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1670813a1f2bSShri Abhyankar PetscBT lnkbt; 1671813a1f2bSShri Abhyankar PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 16720298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 16730298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1674813a1f2bSShri Abhyankar 1675813a1f2bSShri Abhyankar PetscFunctionBegin; 167608401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 16779566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 167828b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1679ad04f41aSHong Zhang 1680813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 16819566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 16829566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 1683813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1684813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 16859566063dSJacob Faibussowitsch PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1686ad540459SPierre Jolivet if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 16873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1688813a1f2bSShri Abhyankar } 1689813a1f2bSShri Abhyankar 16909566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 16919566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 16929566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 1693813a1f2bSShri Abhyankar 16941bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 16959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 16969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1697e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 16989566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1699813a1f2bSShri Abhyankar 1700813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1701813a1f2bSShri Abhyankar nlnk = n + 1; 17029566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1703813a1f2bSShri Abhyankar 1704813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 17051df811f5SHong Zhang f = info->fill; 17061df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 17079566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1708813a1f2bSShri Abhyankar current_space = free_space; 17099566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1710813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1711813a1f2bSShri Abhyankar for (i = 0; i < n; i++) { 1712813a1f2bSShri Abhyankar nzi = 0; 1713813a1f2bSShri Abhyankar /* copy current row into linked list */ 1714813a1f2bSShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 171528b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1716813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1717813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 17189566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1719813a1f2bSShri Abhyankar nzi += nlnk; 1720813a1f2bSShri Abhyankar 1721813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1722813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1723813a1f2bSShri Abhyankar fm = n; 1724813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1725813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1726813a1f2bSShri Abhyankar lnk[fm] = i; 1727813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 17289371c9d4SSatish Balay nzi++; 17299371c9d4SSatish Balay dcount++; 1730813a1f2bSShri Abhyankar } 1731813a1f2bSShri Abhyankar 1732813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1733813a1f2bSShri Abhyankar nzbd = 0; 1734813a1f2bSShri Abhyankar prow = lnk[n]; 1735813a1f2bSShri Abhyankar while (prow < i) { 1736813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1737813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1738813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1739813a1f2bSShri Abhyankar nnz = bi[prow + 1] - bi[prow] - nnz - 1; 17409566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1741813a1f2bSShri Abhyankar nzi += nlnk; 1742813a1f2bSShri Abhyankar prow = lnk[prow]; 1743813a1f2bSShri Abhyankar nzbd++; 1744813a1f2bSShri Abhyankar } 1745813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1746813a1f2bSShri Abhyankar bi[i + 1] = bi[i] + nzi; 1747813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1748813a1f2bSShri Abhyankar if (current_space->local_remaining < nzi) { 1749f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 17509566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 17519566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1752813a1f2bSShri Abhyankar reallocs++; 1753813a1f2bSShri Abhyankar } 1754813a1f2bSShri Abhyankar 1755813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 17569566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1757813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1758813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1759813a1f2bSShri Abhyankar 1760813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 176108401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1762813a1f2bSShri Abhyankar 1763813a1f2bSShri Abhyankar current_space->array += nzi; 1764813a1f2bSShri Abhyankar current_space->local_used += nzi; 1765813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1766813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1767813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1768813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1769813a1f2bSShri Abhyankar } 1770813a1f2bSShri Abhyankar 17719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 17729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 1773813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 17749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 17759566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1776813a1f2bSShri Abhyankar 17779566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 17789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 17799566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1780813a1f2bSShri Abhyankar 1781813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1782813a1f2bSShri Abhyankar { 1783aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 17849566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 17859566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 17869566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 17879566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 178848a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1789813a1f2bSShri Abhyankar } 1790813a1f2bSShri Abhyankar #endif 1791813a1f2bSShri Abhyankar /* put together the new matrix */ 17929566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1793813a1f2bSShri Abhyankar b = (Mat_SeqAIJ *)(fact)->data; 17942205254eSKarl Rupp 1795813a1f2bSShri Abhyankar b->free_a = PETSC_TRUE; 1796813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 1797813a1f2bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 17982205254eSKarl Rupp 17999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 18002205254eSKarl Rupp 1801813a1f2bSShri Abhyankar b->j = bj; 1802813a1f2bSShri Abhyankar b->i = bi; 1803813a1f2bSShri Abhyankar b->diag = bdiag; 1804f4259b30SLisandro Dalcin b->ilen = NULL; 1805f4259b30SLisandro Dalcin b->imax = NULL; 1806813a1f2bSShri Abhyankar b->row = isrow; 1807813a1f2bSShri Abhyankar b->col = iscol; 18089566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 18099566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 1810813a1f2bSShri Abhyankar b->icol = isicol; 18112205254eSKarl Rupp 18129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1813813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1814813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1815f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 18162205254eSKarl Rupp 1817813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = reallocs; 1818813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = f; 1819f268cba8SShri Abhyankar (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 182035233d99SShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1821ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 18229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 18233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1824813a1f2bSShri Abhyankar } 1825813a1f2bSShri Abhyankar 1826ff6a9541SJacob Faibussowitsch #if 0 1827ff6a9541SJacob Faibussowitsch // unused 1828ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1829d71ae5a4SJacob Faibussowitsch { 18306516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 18316516239fSHong Zhang IS isicol; 18326516239fSHong Zhang const PetscInt *r, *ic; 1833ece542f9SHong Zhang PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 18346516239fSHong Zhang PetscInt *bi, *cols, nnz, *cols_lvl; 18356516239fSHong Zhang PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 18366516239fSHong Zhang PetscInt i, levels, diagonal_fill; 1837ace3abfcSBarry Smith PetscBool col_identity, row_identity; 18386516239fSHong Zhang PetscReal f; 18390298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL; 18406516239fSHong Zhang PetscBT lnkbt; 18416516239fSHong Zhang PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 18420298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 18430298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1844ace3abfcSBarry Smith PetscBool missing; 18456516239fSHong Zhang 18466516239fSHong Zhang PetscFunctionBegin; 184708401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 18489566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 184928b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1850ece542f9SHong Zhang 18516516239fSHong Zhang f = info->fill; 18526516239fSHong Zhang levels = (PetscInt)info->levels; 18536516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 18542205254eSKarl Rupp 18559566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 18566516239fSHong Zhang 18579566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 18589566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 18596516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 18609566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 18612205254eSKarl Rupp 1862ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1863ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1864d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 1865719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 1866719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 1867719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 18682205254eSKarl Rupp 1869719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 187008480c60SBarry Smith b->row = isrow; 187108480c60SBarry Smith b->col = iscol; 187282bf6240SBarry Smith b->icol = isicol; 18739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 18749566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 18759566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 18763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 187708480c60SBarry Smith } 187808480c60SBarry Smith 18799566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 18809566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 18819e25ed09SBarry Smith 18821bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 18839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 18849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 1885a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 18866cf997b0SBarry Smith 18879566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 18885a8e39fbSHong Zhang 18895a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 18905a8e39fbSHong Zhang nlnk = n + 1; 18919566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 18925a8e39fbSHong Zhang 18935a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 18949566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 18955a8e39fbSHong Zhang current_space = free_space; 18969566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 18975a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 18985a8e39fbSHong Zhang 18995a8e39fbSHong Zhang for (i = 0; i < n; i++) { 19005a8e39fbSHong Zhang nzi = 0; 19016cf997b0SBarry Smith /* copy current row into linked list */ 19025a8e39fbSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 190328b400f6SJacob Faibussowitsch PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 19045a8e39fbSHong Zhang cols = aj + ai[r[i]]; 19055a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 19069566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 19075a8e39fbSHong Zhang nzi += nlnk; 19086cf997b0SBarry Smith 19096cf997b0SBarry Smith /* make sure diagonal entry is included */ 19105a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 19116cf997b0SBarry Smith fm = n; 19125a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 19135a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 19145a8e39fbSHong Zhang lnk[fm] = i; 19155a8e39fbSHong Zhang lnk_lvl[i] = 0; 19169371c9d4SSatish Balay nzi++; 19179371c9d4SSatish Balay dcount++; 19186cf997b0SBarry Smith } 19196cf997b0SBarry Smith 19205a8e39fbSHong Zhang /* add pivot rows into the active row */ 19215a8e39fbSHong Zhang nzbd = 0; 19225a8e39fbSHong Zhang prow = lnk[n]; 19235a8e39fbSHong Zhang while (prow < i) { 19245a8e39fbSHong Zhang nnz = bdiag[prow]; 19255a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 19265a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 19275a8e39fbSHong Zhang nnz = bi[prow + 1] - bi[prow] - nnz - 1; 19289566063dSJacob Faibussowitsch PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 19295a8e39fbSHong Zhang nzi += nlnk; 19305a8e39fbSHong Zhang prow = lnk[prow]; 19315a8e39fbSHong Zhang nzbd++; 193256beaf04SBarry Smith } 19335a8e39fbSHong Zhang bdiag[i] = nzbd; 19345a8e39fbSHong Zhang bi[i + 1] = bi[i] + nzi; 1935ecf371e4SBarry Smith 19365a8e39fbSHong Zhang /* if free space is not available, make more free space */ 19375a8e39fbSHong Zhang if (current_space->local_remaining < nzi) { 1938f91af8c7SBarry Smith nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 19399566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 19409566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 19415a8e39fbSHong Zhang reallocs++; 19425a8e39fbSHong Zhang } 1943ecf371e4SBarry Smith 19445a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 19459566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 19465a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 19475a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 19485a8e39fbSHong Zhang 19495a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 195008401ef6SPierre Jolivet PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 19515a8e39fbSHong Zhang 19525a8e39fbSHong Zhang current_space->array += nzi; 19535a8e39fbSHong Zhang current_space->local_used += nzi; 19545a8e39fbSHong Zhang current_space->local_remaining -= nzi; 19555a8e39fbSHong Zhang current_space_lvl->array += nzi; 19565a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 19575a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 19589e25ed09SBarry Smith } 19595a8e39fbSHong Zhang 19609566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 19619566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 19625a8e39fbSHong Zhang 19635a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 19649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 19659566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 19669566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 19679566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 19689566063dSJacob Faibussowitsch PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 19699e25ed09SBarry Smith 19706cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1971f64065bbSBarry Smith { 19725a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 19739566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 19749566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 19759566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 19769566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 197748a46eb9SPierre Jolivet if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1978f64065bbSBarry Smith } 197963ba0a88SBarry Smith #endif 1980bbb6d6a8SBarry Smith 19819e25ed09SBarry Smith /* put together the new matrix */ 19829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1983719d5645SBarry Smith b = (Mat_SeqAIJ *)(fact)->data; 19842205254eSKarl Rupp 1985e6b907acSBarry Smith b->free_a = PETSC_TRUE; 1986e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 19877c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 19882205254eSKarl Rupp 19899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n], &b->a)); 19905a8e39fbSHong Zhang b->j = bj; 19915a8e39fbSHong Zhang b->i = bi; 19925a8e39fbSHong Zhang for (i = 0; i < n; i++) bdiag[i] += bi[i]; 19935a8e39fbSHong Zhang b->diag = bdiag; 1994f4259b30SLisandro Dalcin b->ilen = NULL; 1995f4259b30SLisandro Dalcin b->imax = NULL; 1996416022c9SBarry Smith b->row = isrow; 1997416022c9SBarry Smith b->col = iscol; 19989566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 19999566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 200082bf6240SBarry Smith b->icol = isicol; 20019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2002416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 20035a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 20045a8e39fbSHong Zhang b->maxnz = b->nz = bi[n]; 20052205254eSKarl Rupp 2006719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 2007719d5645SBarry Smith (fact)->info.fill_ratio_given = f; 2008719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2009ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2010ad540459SPierre Jolivet if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 20113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20129e25ed09SBarry Smith } 2013ff6a9541SJacob Faibussowitsch #endif 2014d5d45c9bSBarry Smith 2015d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2016d71ae5a4SJacob Faibussowitsch { 20175f44c854SHong Zhang Mat C = B; 20185f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 20195f44c854SHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 20205f44c854SHong Zhang IS ip = b->row, iip = b->icol; 20215f44c854SHong Zhang const PetscInt *rip, *riip; 20225f44c854SHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 20235f44c854SHong Zhang PetscInt *ai = a->i, *aj = a->j; 20245f44c854SHong Zhang PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 20255f44c854SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2026ace3abfcSBarry Smith PetscBool perm_identity; 2027d90ac83dSHong Zhang FactorShiftCtx sctx; 202898a93161SHong Zhang PetscReal rs; 202998a93161SHong Zhang MatScalar d, *v; 203098a93161SHong Zhang 20315f44c854SHong Zhang PetscFunctionBegin; 203298a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 20339566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 203498a93161SHong Zhang 2035f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 203698a93161SHong Zhang sctx.shift_top = info->zeropivot; 203798a93161SHong Zhang for (i = 0; i < mbs; i++) { 203898a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 203998a93161SHong Zhang d = (aa)[a->diag[i]]; 204098a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 204198a93161SHong Zhang v = aa + ai[i]; 204298a93161SHong Zhang nz = ai[i + 1] - ai[i]; 20432205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 204498a93161SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 204598a93161SHong Zhang } 204698a93161SHong Zhang sctx.shift_top *= 1.1; 204798a93161SHong Zhang sctx.nshift_max = 5; 204898a93161SHong Zhang sctx.shift_lo = 0.; 204998a93161SHong Zhang sctx.shift_hi = 1.; 205098a93161SHong Zhang } 20515f44c854SHong Zhang 20529566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 20539566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 20545f44c854SHong Zhang 20555f44c854SHong Zhang /* allocate working arrays 20565f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 20575f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 20585f44c854SHong Zhang */ 20599566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 20605f44c854SHong Zhang 206198a93161SHong Zhang do { 206207b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 206398a93161SHong Zhang 2064c5546cabSHong Zhang for (i = 0; i < mbs; i++) c2r[i] = mbs; 20652e987568SSatish Balay if (mbs) il[0] = 0; 20665f44c854SHong Zhang 20675f44c854SHong Zhang for (k = 0; k < mbs; k++) { 20685f44c854SHong Zhang /* zero rtmp */ 20695f44c854SHong Zhang nz = bi[k + 1] - bi[k]; 20705f44c854SHong Zhang bjtmp = bj + bi[k]; 20715f44c854SHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 20725f44c854SHong Zhang 20735f44c854SHong Zhang /* load in initial unfactored row */ 20745f44c854SHong Zhang bval = ba + bi[k]; 20759371c9d4SSatish Balay jmin = ai[rip[k]]; 20769371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 20775f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 20785f44c854SHong Zhang col = riip[aj[j]]; 20795f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 20805f44c854SHong Zhang rtmp[col] = aa[j]; 20815f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 20825f44c854SHong Zhang } 20835f44c854SHong Zhang } 208498a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 208598a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 20865f44c854SHong Zhang 20875f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 20885f44c854SHong Zhang dk = rtmp[k]; 20895f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 20905f44c854SHong Zhang 20915f44c854SHong Zhang while (i < k) { 20925f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 20935f44c854SHong Zhang 20945f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 20955f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 20965f44c854SHong Zhang uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 20975f44c854SHong Zhang dk += uikdi * ba[ili]; /* update diag[k] */ 20985f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 20995f44c854SHong Zhang 21005f44c854SHong Zhang /* add multiple of row i to k-th row */ 21019371c9d4SSatish Balay jmin = ili + 1; 21029371c9d4SSatish Balay jmax = bi[i + 1]; 21035f44c854SHong Zhang if (jmin < jmax) { 21045f44c854SHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 21055f44c854SHong Zhang /* update il and c2r for row i */ 21065f44c854SHong Zhang il[i] = jmin; 21079371c9d4SSatish Balay j = bj[jmin]; 21089371c9d4SSatish Balay c2r[i] = c2r[j]; 21099371c9d4SSatish Balay c2r[j] = i; 21105f44c854SHong Zhang } 21115f44c854SHong Zhang i = nexti; 21125f44c854SHong Zhang } 21135f44c854SHong Zhang 21145f44c854SHong Zhang /* copy data into U(k,:) */ 211598a93161SHong Zhang rs = 0.0; 21169371c9d4SSatish Balay jmin = bi[k]; 21179371c9d4SSatish Balay jmax = bi[k + 1] - 1; 21185f44c854SHong Zhang if (jmin < jmax) { 21195f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 21209371c9d4SSatish Balay col = bj[j]; 21219371c9d4SSatish Balay ba[j] = rtmp[col]; 21229371c9d4SSatish Balay rs += PetscAbsScalar(ba[j]); 21235f44c854SHong Zhang } 21245f44c854SHong Zhang /* add the k-th row into il and c2r */ 21255f44c854SHong Zhang il[k] = jmin; 21269371c9d4SSatish Balay i = bj[jmin]; 21279371c9d4SSatish Balay c2r[k] = c2r[i]; 21289371c9d4SSatish Balay c2r[i] = k; 21295f44c854SHong Zhang } 213098a93161SHong Zhang 213198a93161SHong Zhang /* MatPivotCheck() */ 213298a93161SHong Zhang sctx.rs = rs; 213398a93161SHong Zhang sctx.pv = dk; 21349566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 213507b50cabSHong Zhang if (sctx.newshift) break; 213698a93161SHong Zhang dk = sctx.pv; 213798a93161SHong Zhang 213898a93161SHong Zhang ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 213998a93161SHong Zhang } 214007b50cabSHong Zhang } while (sctx.newshift); 21415f44c854SHong Zhang 21429566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, c2r)); 21439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 21449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 21455f44c854SHong Zhang 21469566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 21475f44c854SHong Zhang if (perm_identity) { 214835233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 214935233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 21502169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 21512169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 21525f44c854SHong Zhang } else { 215335233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 215435233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 215535233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 215635233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 21575f44c854SHong Zhang } 21585f44c854SHong Zhang 21595f44c854SHong Zhang C->assembled = PETSC_TRUE; 21605f44c854SHong Zhang C->preallocated = PETSC_TRUE; 21612205254eSKarl Rupp 21629566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 216398a93161SHong Zhang 216498a93161SHong Zhang /* MatPivotView() */ 216598a93161SHong Zhang if (sctx.nshift) { 2166f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 21679566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2168f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 21699566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2170f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 21719566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 217298a93161SHong Zhang } 217398a93161SHong Zhang } 21743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21755f44c854SHong Zhang } 21765f44c854SHong Zhang 2177d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2178d71ae5a4SJacob Faibussowitsch { 2179719d5645SBarry Smith Mat C = B; 21800968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 21812ed133dbSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 21829bfd6278SHong Zhang IS ip = b->row, iip = b->icol; 21835d0c19d7SBarry Smith const PetscInt *rip, *riip; 2184c5546cabSHong Zhang PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 21852ed133dbSHong Zhang PetscInt *ai = a->i, *aj = a->j; 21862ed133dbSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2187622e2028SHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2188ace3abfcSBarry Smith PetscBool perm_identity; 21890e95ead3SHong Zhang FactorShiftCtx sctx; 21900e95ead3SHong Zhang PetscReal rs; 21910e95ead3SHong Zhang MatScalar d, *v; 2192d5d45c9bSBarry Smith 2193a6175056SHong Zhang PetscFunctionBegin; 21940e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 21959566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 21960e95ead3SHong Zhang 21970e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 21980e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 21990e95ead3SHong Zhang for (i = 0; i < mbs; i++) { 22000e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 22010e95ead3SHong Zhang d = (aa)[a->diag[i]]; 22020e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 22030e95ead3SHong Zhang v = aa + ai[i]; 22040e95ead3SHong Zhang nz = ai[i + 1] - ai[i]; 22052205254eSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 22060e95ead3SHong Zhang if (rs > sctx.shift_top) sctx.shift_top = rs; 22070e95ead3SHong Zhang } 22080e95ead3SHong Zhang sctx.shift_top *= 1.1; 22090e95ead3SHong Zhang sctx.nshift_max = 5; 22100e95ead3SHong Zhang sctx.shift_lo = 0.; 22110e95ead3SHong Zhang sctx.shift_hi = 1.; 22120e95ead3SHong Zhang } 2213ee45ca4aSHong Zhang 22149566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip)); 22159566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iip, &riip)); 22162ed133dbSHong Zhang 22172ed133dbSHong Zhang /* initialization */ 22189566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 22190e95ead3SHong Zhang 22202ed133dbSHong Zhang do { 222107b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 22220e95ead3SHong Zhang 2223c5546cabSHong Zhang for (i = 0; i < mbs; i++) jl[i] = mbs; 2224c5546cabSHong Zhang il[0] = 0; 22252ed133dbSHong Zhang 22262ed133dbSHong Zhang for (k = 0; k < mbs; k++) { 2227c5546cabSHong Zhang /* zero rtmp */ 2228c5546cabSHong Zhang nz = bi[k + 1] - bi[k]; 2229c5546cabSHong Zhang bjtmp = bj + bi[k]; 2230c5546cabSHong Zhang for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2231c5546cabSHong Zhang 2232c05c3958SHong Zhang bval = ba + bi[k]; 22332ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 22349371c9d4SSatish Balay jmin = ai[rip[k]]; 22359371c9d4SSatish Balay jmax = ai[rip[k] + 1]; 22362ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 22379bfd6278SHong Zhang col = riip[aj[j]]; 22382ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 22392ed133dbSHong Zhang rtmp[col] = aa[j]; 2240c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 22412ed133dbSHong Zhang } 22422ed133dbSHong Zhang } 224339e3d630SHong Zhang /* shift the diagonal of the matrix */ 2244540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 22452ed133dbSHong Zhang 22462ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 22472ed133dbSHong Zhang dk = rtmp[k]; 22482ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 22492ed133dbSHong Zhang 22502ed133dbSHong Zhang while (i < k) { 22512ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 22522ed133dbSHong Zhang 22532ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 22542ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 22552ed133dbSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 22562ed133dbSHong Zhang dk += uikdi * ba[ili]; 22572ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 22582ed133dbSHong Zhang 22592ed133dbSHong Zhang /* add multiple of row i to k-th row */ 22609371c9d4SSatish Balay jmin = ili + 1; 22619371c9d4SSatish Balay jmax = bi[i + 1]; 22622ed133dbSHong Zhang if (jmin < jmax) { 22632ed133dbSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 22642ed133dbSHong Zhang /* update il and jl for row i */ 22652ed133dbSHong Zhang il[i] = jmin; 22669371c9d4SSatish Balay j = bj[jmin]; 22679371c9d4SSatish Balay jl[i] = jl[j]; 22689371c9d4SSatish Balay jl[j] = i; 22692ed133dbSHong Zhang } 22702ed133dbSHong Zhang i = nexti; 22712ed133dbSHong Zhang } 22722ed133dbSHong Zhang 22730697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 22740697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 22750697b6caSHong Zhang rs = 0.0; 22763c9e8598SHong Zhang jmin = bi[k] + 1; 22773c9e8598SHong Zhang nz = bi[k + 1] - jmin; 22783c9e8598SHong Zhang bcol = bj + jmin; 2279ad540459SPierre Jolivet for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2280540703acSHong Zhang 2281540703acSHong Zhang sctx.rs = rs; 2282540703acSHong Zhang sctx.pv = dk; 22839566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 228407b50cabSHong Zhang if (sctx.newshift) break; 22850e95ead3SHong Zhang dk = sctx.pv; 22863c9e8598SHong Zhang 22873c9e8598SHong Zhang /* copy data into U(k,:) */ 228839e3d630SHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 22899371c9d4SSatish Balay jmin = bi[k] + 1; 22909371c9d4SSatish Balay jmax = bi[k + 1]; 229139e3d630SHong Zhang if (jmin < jmax) { 229239e3d630SHong Zhang for (j = jmin; j < jmax; j++) { 22939371c9d4SSatish Balay col = bj[j]; 22949371c9d4SSatish Balay ba[j] = rtmp[col]; 22953c9e8598SHong Zhang } 229639e3d630SHong Zhang /* add the k-th row into il and jl */ 22973c9e8598SHong Zhang il[k] = jmin; 22989371c9d4SSatish Balay i = bj[jmin]; 22999371c9d4SSatish Balay jl[k] = jl[i]; 23009371c9d4SSatish Balay jl[i] = k; 23013c9e8598SHong Zhang } 23023c9e8598SHong Zhang } 230307b50cabSHong Zhang } while (sctx.newshift); 23040e95ead3SHong Zhang 23059566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl)); 23069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip)); 23079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iip, &riip)); 2308db4efbfdSBarry Smith 23099566063dSJacob Faibussowitsch PetscCall(ISIdentity(ip, &perm_identity)); 2310db4efbfdSBarry Smith if (perm_identity) { 23110a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23120a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23130a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23140a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2315db4efbfdSBarry Smith } else { 23160a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 23170a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 23180a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 23190a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2320db4efbfdSBarry Smith } 2321db4efbfdSBarry Smith 23223c9e8598SHong Zhang C->assembled = PETSC_TRUE; 23233c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 23242205254eSKarl Rupp 23259566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->n)); 2326540703acSHong Zhang if (sctx.nshift) { 2327f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 23289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2329f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 23309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 23310697b6caSHong Zhang } 23323c9e8598SHong Zhang } 23333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 23343c9e8598SHong Zhang } 2335a6175056SHong Zhang 23368a76ed4fSHong Zhang /* 23378a76ed4fSHong Zhang icc() under revised new data structure. 23388a76ed4fSHong Zhang Factored arrays bj and ba are stored as 23398a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 23408a76ed4fSHong Zhang 23418a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 23428a76ed4fSHong Zhang ui+ 23438a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 23448a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 23458a76ed4fSHong Zhang 23468a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 23478a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 23488a76ed4fSHong Zhang 23498a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 23508a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 23518a76ed4fSHong Zhang */ 23528a76ed4fSHong Zhang 2353d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2354d71ae5a4SJacob Faibussowitsch { 23550968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2356ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2357ace3abfcSBarry Smith PetscBool perm_identity, missing; 23585f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 23595d0c19d7SBarry Smith const PetscInt *rip, *riip; 2360ed59401aSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 23610298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 23625a8e39fbSHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2363ed59401aSHong Zhang PetscReal fill = info->fill, levels = info->levels; 23640298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 23650298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 23667a48dd6fSHong Zhang PetscBT lnkbt; 2367b635d36bSHong Zhang IS iperm; 2368a6175056SHong Zhang 2369a6175056SHong Zhang PetscFunctionBegin; 237008401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 23719566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 237228b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 23739566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 23749566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 23757a48dd6fSHong Zhang 23769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 23779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 237839e3d630SHong Zhang ui[0] = 0; 237939e3d630SHong Zhang 23808a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2381622e2028SHong Zhang if (!levels && perm_identity) { 23825f44c854SHong Zhang for (i = 0; i < am; i++) { 2383c5546cabSHong Zhang ncols = ai[i + 1] - a->diag[i]; 2384c5546cabSHong Zhang ui[i + 1] = ui[i] + ncols; 2385c5546cabSHong Zhang udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2386dc1e2a3fSHong Zhang } 23879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2388dc1e2a3fSHong Zhang cols = uj; 2389dc1e2a3fSHong Zhang for (i = 0; i < am; i++) { 23905f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2391dc1e2a3fSHong Zhang ncols = ai[i + 1] - a->diag[i] - 1; 23925f44c854SHong Zhang for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2393910cf402Sprj- *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 23945f44c854SHong Zhang } 23955f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 23969566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 23979566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 23985f44c854SHong Zhang 23995f44c854SHong Zhang /* initialization */ 24009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 24015f44c854SHong Zhang 24025f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 24035f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 24049566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 24055f44c854SHong Zhang for (i = 0; i < am; i++) { 24069371c9d4SSatish Balay jl[i] = am; 24079371c9d4SSatish Balay il[i] = 0; 24085f44c854SHong Zhang } 24095f44c854SHong Zhang 24105f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 24115f44c854SHong Zhang nlnk = am + 1; 24129566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 24135f44c854SHong Zhang 241495b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 24159566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 24165f44c854SHong Zhang current_space = free_space; 24179566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 24185f44c854SHong Zhang current_space_lvl = free_space_lvl; 24195f44c854SHong Zhang 24205f44c854SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 24215f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 24225f44c854SHong Zhang nzk = 0; 24235f44c854SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 242428b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 24255f44c854SHong Zhang ncols_upper = 0; 24265f44c854SHong Zhang for (j = 0; j < ncols; j++) { 24275f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 24285f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 24295f44c854SHong Zhang ajtmp[ncols_upper] = i; 24305f44c854SHong Zhang ncols_upper++; 24315f44c854SHong Zhang } 24325f44c854SHong Zhang } 24339566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 24345f44c854SHong Zhang nzk += nlnk; 24355f44c854SHong Zhang 24365f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 24375f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 24385f44c854SHong Zhang 24395f44c854SHong Zhang while (prow < k) { 24405f44c854SHong Zhang nextprow = jl[prow]; 24415f44c854SHong Zhang 24425f44c854SHong Zhang /* merge prow into k-th row */ 24435f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 24445f44c854SHong Zhang jmax = ui[prow + 1]; 24455f44c854SHong Zhang ncols = jmax - jmin; 24465f44c854SHong Zhang i = jmin - ui[prow]; 24475f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 24485f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 24495f44c854SHong Zhang j = *(uj - 1); 24509566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 24515f44c854SHong Zhang nzk += nlnk; 24525f44c854SHong Zhang 24535f44c854SHong Zhang /* update il and jl for prow */ 24545f44c854SHong Zhang if (jmin < jmax) { 24555f44c854SHong Zhang il[prow] = jmin; 24569371c9d4SSatish Balay j = *cols; 24579371c9d4SSatish Balay jl[prow] = jl[j]; 24589371c9d4SSatish Balay jl[j] = prow; 24595f44c854SHong Zhang } 24605f44c854SHong Zhang prow = nextprow; 24615f44c854SHong Zhang } 24625f44c854SHong Zhang 24635f44c854SHong Zhang /* if free space is not available, make more free space */ 24645f44c854SHong Zhang if (current_space->local_remaining < nzk) { 24655f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2466f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 24679566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 24689566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 24695f44c854SHong Zhang reallocs++; 24705f44c854SHong Zhang } 24715f44c854SHong Zhang 24725f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 247308401ef6SPierre Jolivet PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 24749566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 24755f44c854SHong Zhang 24765f44c854SHong Zhang /* add the k-th row into il and jl */ 24775f44c854SHong Zhang if (nzk > 1) { 24785f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 24799371c9d4SSatish Balay jl[k] = jl[i]; 24809371c9d4SSatish Balay jl[i] = k; 24815f44c854SHong Zhang il[k] = ui[k] + 1; 24825f44c854SHong Zhang } 24835f44c854SHong Zhang uj_ptr[k] = current_space->array; 24845f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 24855f44c854SHong Zhang 24865f44c854SHong Zhang current_space->array += nzk; 24875f44c854SHong Zhang current_space->local_used += nzk; 24885f44c854SHong Zhang current_space->local_remaining -= nzk; 24895f44c854SHong Zhang 24905f44c854SHong Zhang current_space_lvl->array += nzk; 24915f44c854SHong Zhang current_space_lvl->local_used += nzk; 24925f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 24935f44c854SHong Zhang 24945f44c854SHong Zhang ui[k + 1] = ui[k] + nzk; 24955f44c854SHong Zhang } 24965f44c854SHong Zhang 24979566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 24989566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 24999566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 25009566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 25015f44c854SHong Zhang 25029263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 25039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 25049566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 25059566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 25069566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 25075f44c854SHong Zhang 25085f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 25095f44c854SHong Zhang 25105f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 25115f44c854SHong Zhang b = (Mat_SeqSBAIJ *)(fact)->data; 25125f44c854SHong Zhang b->singlemalloc = PETSC_FALSE; 25132205254eSKarl Rupp 25149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 25152205254eSKarl Rupp 25165f44c854SHong Zhang b->j = uj; 25175f44c854SHong Zhang b->i = ui; 25185f44c854SHong Zhang b->diag = udiag; 25197f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2520f4259b30SLisandro Dalcin b->ilen = NULL; 2521f4259b30SLisandro Dalcin b->imax = NULL; 25225f44c854SHong Zhang b->row = perm; 25235f44c854SHong Zhang b->col = perm; 25249566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 25259566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 25265f44c854SHong Zhang b->icol = iperm; 25275f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 25282205254eSKarl Rupp 25299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 25302205254eSKarl Rupp 25315f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 25325f44c854SHong Zhang b->free_a = PETSC_TRUE; 25335f44c854SHong Zhang b->free_ij = PETSC_TRUE; 25345f44c854SHong Zhang 2535f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2536f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 25375f44c854SHong Zhang if (ai[am] != 0) { 25386a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 253995b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 25405f44c854SHong Zhang } else { 2541f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 25425f44c854SHong Zhang } 25439263d837SHong Zhang #if defined(PETSC_USE_INFO) 25449263d837SHong Zhang if (ai[am] != 0) { 25459263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 25469566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 25479566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 25489566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 25499263d837SHong Zhang } else { 25509566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 25519263d837SHong Zhang } 25529263d837SHong Zhang #endif 255335233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 25543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25555f44c854SHong Zhang } 25565f44c854SHong Zhang 2557ff6a9541SJacob Faibussowitsch #if 0 2558ff6a9541SJacob Faibussowitsch // unused 2559ff6a9541SJacob Faibussowitsch static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2560d71ae5a4SJacob Faibussowitsch { 25615f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 25625f44c854SHong Zhang Mat_SeqSBAIJ *b; 2563ace3abfcSBarry Smith PetscBool perm_identity, missing; 25645f44c854SHong Zhang PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 25655f44c854SHong Zhang const PetscInt *rip, *riip; 25665f44c854SHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 25670298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 25685f44c854SHong Zhang PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 25695f44c854SHong Zhang PetscReal fill = info->fill, levels = info->levels; 25700298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 25710298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 25725f44c854SHong Zhang PetscBT lnkbt; 25735f44c854SHong Zhang IS iperm; 25745f44c854SHong Zhang 25755f44c854SHong Zhang PetscFunctionBegin; 257608401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 25779566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &d)); 257828b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 25799566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 25809566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 25815f44c854SHong Zhang 25829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 25839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 25845f44c854SHong Zhang ui[0] = 0; 25855f44c854SHong Zhang 25865f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 25875f44c854SHong Zhang if (!levels && perm_identity) { 25885f44c854SHong Zhang for (i = 0; i < am; i++) { 25895f44c854SHong Zhang ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 25905f44c854SHong Zhang udiag[i] = ui[i]; 2591ed59401aSHong Zhang } 25929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 259339e3d630SHong Zhang cols = uj; 2594ed59401aSHong Zhang for (i = 0; i < am; i++) { 2595ed59401aSHong Zhang aj = a->j + a->diag[i]; 259639e3d630SHong Zhang ncols = ui[i + 1] - ui[i]; 259739e3d630SHong Zhang for (j = 0; j < ncols; j++) *cols++ = *aj++; 2598ed59401aSHong Zhang } 259939e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 26009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 26019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 2602b635d36bSHong Zhang 26037a48dd6fSHong Zhang /* initialization */ 26049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ajtmp)); 26057a48dd6fSHong Zhang 26067a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 26077a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 26089566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 26097a48dd6fSHong Zhang for (i = 0; i < am; i++) { 26109371c9d4SSatish Balay jl[i] = am; 26119371c9d4SSatish Balay il[i] = 0; 26127a48dd6fSHong Zhang } 26137a48dd6fSHong Zhang 26147a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 26157a48dd6fSHong Zhang nlnk = am + 1; 26169566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 26177a48dd6fSHong Zhang 26187a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 26199566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 26207a48dd6fSHong Zhang current_space = free_space; 26219566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 26227a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 26237a48dd6fSHong Zhang 26247a48dd6fSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 26257a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 26267a48dd6fSHong Zhang nzk = 0; 2627622e2028SHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 262828b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2629622e2028SHong Zhang ncols_upper = 0; 2630622e2028SHong Zhang for (j = 0; j < ncols; j++) { 2631b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2632b635d36bSHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 26335a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2634622e2028SHong Zhang ncols_upper++; 2635622e2028SHong Zhang } 2636622e2028SHong Zhang } 26379566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 26387a48dd6fSHong Zhang nzk += nlnk; 26397a48dd6fSHong Zhang 26407a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 26417a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 26427a48dd6fSHong Zhang 26437a48dd6fSHong Zhang while (prow < k) { 26447a48dd6fSHong Zhang nextprow = jl[prow]; 26457a48dd6fSHong Zhang 26467a48dd6fSHong Zhang /* merge prow into k-th row */ 26477a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 26487a48dd6fSHong Zhang jmax = ui[prow + 1]; 26497a48dd6fSHong Zhang ncols = jmax - jmin; 2650ed59401aSHong Zhang i = jmin - ui[prow]; 2651ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 26525a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 26535a8e39fbSHong Zhang j = *(uj - 1); 26549566063dSJacob Faibussowitsch PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 26557a48dd6fSHong Zhang nzk += nlnk; 26567a48dd6fSHong Zhang 26577a48dd6fSHong Zhang /* update il and jl for prow */ 26587a48dd6fSHong Zhang if (jmin < jmax) { 26597a48dd6fSHong Zhang il[prow] = jmin; 26609371c9d4SSatish Balay j = *cols; 26619371c9d4SSatish Balay jl[prow] = jl[j]; 26629371c9d4SSatish Balay jl[j] = prow; 26637a48dd6fSHong Zhang } 26647a48dd6fSHong Zhang prow = nextprow; 26657a48dd6fSHong Zhang } 26667a48dd6fSHong Zhang 26677a48dd6fSHong Zhang /* if free space is not available, make more free space */ 26687a48dd6fSHong Zhang if (current_space->local_remaining < nzk) { 26697a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2670f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 26719566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 26729566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 26737a48dd6fSHong Zhang reallocs++; 26747a48dd6fSHong Zhang } 26757a48dd6fSHong Zhang 26767a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 267728b400f6SJacob Faibussowitsch PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 26789566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 26797a48dd6fSHong Zhang 26807a48dd6fSHong Zhang /* add the k-th row into il and jl */ 268139e3d630SHong Zhang if (nzk > 1) { 26827a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 26839371c9d4SSatish Balay jl[k] = jl[i]; 26849371c9d4SSatish Balay jl[i] = k; 26857a48dd6fSHong Zhang il[k] = ui[k] + 1; 26867a48dd6fSHong Zhang } 26877a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 26887a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 26897a48dd6fSHong Zhang 26907a48dd6fSHong Zhang current_space->array += nzk; 26917a48dd6fSHong Zhang current_space->local_used += nzk; 26927a48dd6fSHong Zhang current_space->local_remaining -= nzk; 26937a48dd6fSHong Zhang 26947a48dd6fSHong Zhang current_space_lvl->array += nzk; 26957a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 26967a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 26977a48dd6fSHong Zhang 26987a48dd6fSHong Zhang ui[k + 1] = ui[k] + nzk; 26997a48dd6fSHong Zhang } 27007a48dd6fSHong Zhang 27016cf91177SBarry Smith #if defined(PETSC_USE_INFO) 27027a48dd6fSHong Zhang if (ai[am] != 0) { 270339e3d630SHong Zhang PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 27049566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 27059566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 27069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 27077a48dd6fSHong Zhang } else { 27089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 27097a48dd6fSHong Zhang } 271063ba0a88SBarry Smith #endif 27117a48dd6fSHong Zhang 27129566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 27139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 27149566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 27159566063dSJacob Faibussowitsch PetscCall(PetscFree(ajtmp)); 27167a48dd6fSHong Zhang 27177a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 27189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 27199566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 27209566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 27219566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 27227a48dd6fSHong Zhang 272339e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 272439e3d630SHong Zhang 27257a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 27267a48dd6fSHong Zhang 2727f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 27287a48dd6fSHong Zhang b->singlemalloc = PETSC_FALSE; 27292205254eSKarl Rupp 27309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 27312205254eSKarl Rupp 27327a48dd6fSHong Zhang b->j = uj; 27337a48dd6fSHong Zhang b->i = ui; 27345f44c854SHong Zhang b->diag = udiag; 27357f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 2736f4259b30SLisandro Dalcin b->ilen = NULL; 2737f4259b30SLisandro Dalcin b->imax = NULL; 27387a48dd6fSHong Zhang b->row = perm; 2739b635d36bSHong Zhang b->col = perm; 27402205254eSKarl Rupp 27419566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 27429566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 27432205254eSKarl Rupp 2744b635d36bSHong Zhang b->icol = iperm; 27457a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 27469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 27477a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 2748e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2749e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 27507a48dd6fSHong Zhang 2751f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2752f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 27537a48dd6fSHong Zhang if (ai[am] != 0) { 2754f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 27557a48dd6fSHong Zhang } else { 2756f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 27577a48dd6fSHong Zhang } 27580a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 27593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2760a6175056SHong Zhang } 2761ff6a9541SJacob Faibussowitsch #endif 2762d5d45c9bSBarry Smith 2763d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2764d71ae5a4SJacob Faibussowitsch { 27650be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 27660be760fbSHong Zhang Mat_SeqSBAIJ *b; 27679186b105SHong Zhang PetscBool perm_identity, missing; 27680be760fbSHong Zhang PetscReal fill = info->fill; 27690be760fbSHong Zhang const PetscInt *rip, *riip; 27700be760fbSHong Zhang PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 27710be760fbSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 27720be760fbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 27730298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 27740be760fbSHong Zhang PetscBT lnkbt; 27750be760fbSHong Zhang IS iperm; 27760be760fbSHong Zhang 27770be760fbSHong Zhang PetscFunctionBegin; 277808401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 27799566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 278028b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 27819186b105SHong Zhang 27820be760fbSHong Zhang /* check whether perm is the identity mapping */ 27839566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 27849566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 27859566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 27869566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 27870be760fbSHong Zhang 27880be760fbSHong Zhang /* initialization */ 27899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 27909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &udiag)); 27910be760fbSHong Zhang ui[0] = 0; 27920be760fbSHong Zhang 27930be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 27940be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 27959566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 27960be760fbSHong Zhang for (i = 0; i < am; i++) { 27979371c9d4SSatish Balay jl[i] = am; 27989371c9d4SSatish Balay il[i] = 0; 27990be760fbSHong Zhang } 28000be760fbSHong Zhang 28010be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 28020be760fbSHong Zhang nlnk = am + 1; 28039566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 28040be760fbSHong Zhang 280595b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 28069566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 28070be760fbSHong Zhang current_space = free_space; 28080be760fbSHong Zhang 28090be760fbSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 28100be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 28110be760fbSHong Zhang nzk = 0; 28120be760fbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 281328b400f6SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 28140be760fbSHong Zhang ncols_upper = 0; 28150be760fbSHong Zhang for (j = 0; j < ncols; j++) { 28160be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 28170be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 28180be760fbSHong Zhang cols[ncols_upper] = i; 28190be760fbSHong Zhang ncols_upper++; 28200be760fbSHong Zhang } 28210be760fbSHong Zhang } 28229566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 28230be760fbSHong Zhang nzk += nlnk; 28240be760fbSHong Zhang 28250be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 28260be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 28270be760fbSHong Zhang 28280be760fbSHong Zhang while (prow < k) { 28290be760fbSHong Zhang nextprow = jl[prow]; 28300be760fbSHong Zhang /* merge prow into k-th row */ 28310be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 28320be760fbSHong Zhang jmax = ui[prow + 1]; 28330be760fbSHong Zhang ncols = jmax - jmin; 28340be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 28359566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 28360be760fbSHong Zhang nzk += nlnk; 28370be760fbSHong Zhang 28380be760fbSHong Zhang /* update il and jl for prow */ 28390be760fbSHong Zhang if (jmin < jmax) { 28400be760fbSHong Zhang il[prow] = jmin; 28412205254eSKarl Rupp j = *uj_ptr; 28422205254eSKarl Rupp jl[prow] = jl[j]; 28432205254eSKarl Rupp jl[j] = prow; 28440be760fbSHong Zhang } 28450be760fbSHong Zhang prow = nextprow; 28460be760fbSHong Zhang } 28470be760fbSHong Zhang 28480be760fbSHong Zhang /* if free space is not available, make more free space */ 28490be760fbSHong Zhang if (current_space->local_remaining < nzk) { 28500be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2851f91af8c7SBarry Smith i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 28529566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 28530be760fbSHong Zhang reallocs++; 28540be760fbSHong Zhang } 28550be760fbSHong Zhang 28560be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 28579566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 28580be760fbSHong Zhang 28590be760fbSHong Zhang /* add the k-th row into il and jl */ 28607b056e98SHong Zhang if (nzk > 1) { 28610be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 28629371c9d4SSatish Balay jl[k] = jl[i]; 28639371c9d4SSatish Balay jl[i] = k; 28640be760fbSHong Zhang il[k] = ui[k] + 1; 28650be760fbSHong Zhang } 28660be760fbSHong Zhang ui_ptr[k] = current_space->array; 28672205254eSKarl Rupp 28680be760fbSHong Zhang current_space->array += nzk; 28690be760fbSHong Zhang current_space->local_used += nzk; 28700be760fbSHong Zhang current_space->local_remaining -= nzk; 28710be760fbSHong Zhang 28720be760fbSHong Zhang ui[k + 1] = ui[k] + nzk; 28730be760fbSHong Zhang } 28740be760fbSHong Zhang 28759566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 28769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 28779566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 28780be760fbSHong Zhang 28799263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 28809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 28819566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 28829566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 28830be760fbSHong Zhang 28840be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 28850be760fbSHong Zhang 2886f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 28870be760fbSHong Zhang b->singlemalloc = PETSC_FALSE; 28880be760fbSHong Zhang b->free_a = PETSC_TRUE; 28890be760fbSHong Zhang b->free_ij = PETSC_TRUE; 28902205254eSKarl Rupp 28919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 28922205254eSKarl Rupp 28930be760fbSHong Zhang b->j = uj; 28940be760fbSHong Zhang b->i = ui; 28950be760fbSHong Zhang b->diag = udiag; 28960be760fbSHong Zhang b->free_diag = PETSC_TRUE; 2897f4259b30SLisandro Dalcin b->ilen = NULL; 2898f4259b30SLisandro Dalcin b->imax = NULL; 28990be760fbSHong Zhang b->row = perm; 29000be760fbSHong Zhang b->col = perm; 290126fbe8dcSKarl Rupp 29029566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 29039566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 29042205254eSKarl Rupp 29050be760fbSHong Zhang b->icol = iperm; 29060be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 290726fbe8dcSKarl Rupp 29089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 29092205254eSKarl Rupp 29100be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 29110be760fbSHong Zhang 2912f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2913f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 29140be760fbSHong Zhang if (ai[am] != 0) { 29156a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 291695b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 29170be760fbSHong Zhang } else { 2918f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 29190be760fbSHong Zhang } 29209263d837SHong Zhang #if defined(PETSC_USE_INFO) 29219263d837SHong Zhang if (ai[am] != 0) { 29229263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 29239566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 29249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 29259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 29269263d837SHong Zhang } else { 29279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 29289263d837SHong Zhang } 29299263d837SHong Zhang #endif 293035233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 29313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29320be760fbSHong Zhang } 29330be760fbSHong Zhang 2934ff6a9541SJacob Faibussowitsch #if 0 2935ff6a9541SJacob Faibussowitsch // unused 2936ff6a9541SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2937d71ae5a4SJacob Faibussowitsch { 2938f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 293910c27e3dSHong Zhang Mat_SeqSBAIJ *b; 29409186b105SHong Zhang PetscBool perm_identity, missing; 294110c27e3dSHong Zhang PetscReal fill = info->fill; 29425d0c19d7SBarry Smith const PetscInt *rip, *riip; 29435d0c19d7SBarry Smith PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 294410c27e3dSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 29452ed133dbSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 29460298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 294710c27e3dSHong Zhang PetscBT lnkbt; 29482ed133dbSHong Zhang IS iperm; 2949f76d2b81SHong Zhang 2950f76d2b81SHong Zhang PetscFunctionBegin; 295108401ef6SPierre Jolivet PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 29529566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 295328b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 29549186b105SHong Zhang 29552ed133dbSHong Zhang /* check whether perm is the identity mapping */ 29569566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity)); 29579566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 29589566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iperm, &riip)); 29599566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip)); 296010c27e3dSHong Zhang 296110c27e3dSHong Zhang /* initialization */ 29629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui)); 296310c27e3dSHong Zhang ui[0] = 0; 296410c27e3dSHong Zhang 296510c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 29661393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 29679566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 29681393bc97SHong Zhang for (i = 0; i < am; i++) { 29699371c9d4SSatish Balay jl[i] = am; 29709371c9d4SSatish Balay il[i] = 0; 2971f76d2b81SHong Zhang } 297210c27e3dSHong Zhang 297310c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 29741393bc97SHong Zhang nlnk = am + 1; 29759566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 297610c27e3dSHong Zhang 29771393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 29789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 297910c27e3dSHong Zhang current_space = free_space; 298010c27e3dSHong Zhang 29811393bc97SHong Zhang for (k = 0; k < am; k++) { /* for each active row k */ 298210c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 298310c27e3dSHong Zhang nzk = 0; 29842ed133dbSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]]; 298594bad497SJacob Faibussowitsch PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 29862ed133dbSHong Zhang ncols_upper = 0; 29872ed133dbSHong Zhang for (j = 0; j < ncols; j++) { 29889bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 29892ed133dbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 29902ed133dbSHong Zhang cols[ncols_upper] = i; 29912ed133dbSHong Zhang ncols_upper++; 29922ed133dbSHong Zhang } 29932ed133dbSHong Zhang } 29949566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 299510c27e3dSHong Zhang nzk += nlnk; 299610c27e3dSHong Zhang 299710c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 299810c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 299910c27e3dSHong Zhang 300010c27e3dSHong Zhang while (prow < k) { 300110c27e3dSHong Zhang nextprow = jl[prow]; 300210c27e3dSHong Zhang /* merge prow into k-th row */ 30031393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 300410c27e3dSHong Zhang jmax = ui[prow + 1]; 300510c27e3dSHong Zhang ncols = jmax - jmin; 30061393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 30079566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 300810c27e3dSHong Zhang nzk += nlnk; 300910c27e3dSHong Zhang 301010c27e3dSHong Zhang /* update il and jl for prow */ 301110c27e3dSHong Zhang if (jmin < jmax) { 301210c27e3dSHong Zhang il[prow] = jmin; 30139371c9d4SSatish Balay j = *uj_ptr; 30149371c9d4SSatish Balay jl[prow] = jl[j]; 30159371c9d4SSatish Balay jl[j] = prow; 301610c27e3dSHong Zhang } 301710c27e3dSHong Zhang prow = nextprow; 301810c27e3dSHong Zhang } 301910c27e3dSHong Zhang 302010c27e3dSHong Zhang /* if free space is not available, make more free space */ 302110c27e3dSHong Zhang if (current_space->local_remaining < nzk) { 30221393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 302310c27e3dSHong Zhang i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 30249566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 302510c27e3dSHong Zhang reallocs++; 302610c27e3dSHong Zhang } 302710c27e3dSHong Zhang 302810c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 30299566063dSJacob Faibussowitsch PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 303010c27e3dSHong Zhang 303110c27e3dSHong Zhang /* add the k-th row into il and jl */ 303210c27e3dSHong Zhang if (nzk - 1 > 0) { 30331393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 30349371c9d4SSatish Balay jl[k] = jl[i]; 30359371c9d4SSatish Balay jl[i] = k; 303610c27e3dSHong Zhang il[k] = ui[k] + 1; 303710c27e3dSHong Zhang } 30382ed133dbSHong Zhang ui_ptr[k] = current_space->array; 30392205254eSKarl Rupp 304010c27e3dSHong Zhang current_space->array += nzk; 304110c27e3dSHong Zhang current_space->local_used += nzk; 304210c27e3dSHong Zhang current_space->local_remaining -= nzk; 304310c27e3dSHong Zhang 304410c27e3dSHong Zhang ui[k + 1] = ui[k] + nzk; 304510c27e3dSHong Zhang } 304610c27e3dSHong Zhang 30476cf91177SBarry Smith #if defined(PETSC_USE_INFO) 30481393bc97SHong Zhang if (ai[am] != 0) { 30491393bc97SHong Zhang PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 30509566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 30519566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 30529566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 305310c27e3dSHong Zhang } else { 30549566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 305510c27e3dSHong Zhang } 305663ba0a88SBarry Smith #endif 305710c27e3dSHong Zhang 30589566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip)); 30599566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iperm, &riip)); 30609566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 306110c27e3dSHong Zhang 306210c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 30639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 30649566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 30659566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 306610c27e3dSHong Zhang 306710c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 306810c27e3dSHong Zhang 3069f284f12aSHong Zhang b = (Mat_SeqSBAIJ *)fact->data; 307010c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 3071e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3072e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 30732205254eSKarl Rupp 30749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 30752205254eSKarl Rupp 307610c27e3dSHong Zhang b->j = uj; 307710c27e3dSHong Zhang b->i = ui; 3078f4259b30SLisandro Dalcin b->diag = NULL; 3079f4259b30SLisandro Dalcin b->ilen = NULL; 3080f4259b30SLisandro Dalcin b->imax = NULL; 308110c27e3dSHong Zhang b->row = perm; 30829bfd6278SHong Zhang b->col = perm; 30832205254eSKarl Rupp 30849566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30859566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm)); 30862205254eSKarl Rupp 30879bfd6278SHong Zhang b->icol = iperm; 308810c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 30892205254eSKarl Rupp 30909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 30911393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 309210c27e3dSHong Zhang 3093f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 3094f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 30951393bc97SHong Zhang if (ai[am] != 0) { 3096f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 309710c27e3dSHong Zhang } else { 3098f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 309910c27e3dSHong Zhang } 31000a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 31013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3102f76d2b81SHong Zhang } 3103ff6a9541SJacob Faibussowitsch #endif 3104599ef60dSHong Zhang 3105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3106d71ae5a4SJacob Faibussowitsch { 3107813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3108813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3109813a1f2bSShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3110813a1f2bSShri Abhyankar PetscScalar *x, sum; 3111813a1f2bSShri Abhyankar const PetscScalar *b; 3112813a1f2bSShri Abhyankar const MatScalar *aa = a->a, *v; 3113813a1f2bSShri Abhyankar PetscInt i, nz; 3114813a1f2bSShri Abhyankar 3115813a1f2bSShri Abhyankar PetscFunctionBegin; 31163ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3117813a1f2bSShri Abhyankar 31189566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 31199566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3120813a1f2bSShri Abhyankar 3121813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3122813a1f2bSShri Abhyankar x[0] = b[0]; 3123813a1f2bSShri Abhyankar v = aa; 3124813a1f2bSShri Abhyankar vi = aj; 3125813a1f2bSShri Abhyankar for (i = 1; i < n; i++) { 3126813a1f2bSShri Abhyankar nz = ai[i + 1] - ai[i]; 3127813a1f2bSShri Abhyankar sum = b[i]; 3128813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3129813a1f2bSShri Abhyankar v += nz; 3130813a1f2bSShri Abhyankar vi += nz; 3131813a1f2bSShri Abhyankar x[i] = sum; 3132813a1f2bSShri Abhyankar } 3133813a1f2bSShri Abhyankar 3134813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 313562fbd6afSShri Abhyankar for (i = n - 1; i >= 0; i--) { 31363c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 31373c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3138813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3139813a1f2bSShri Abhyankar sum = x[i]; 3140813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum, x, v, vi, nz); 31413c0119dfSShri Abhyankar x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3142813a1f2bSShri Abhyankar } 3143813a1f2bSShri Abhyankar 31449566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 31459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 31469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 31473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3148813a1f2bSShri Abhyankar } 3149813a1f2bSShri Abhyankar 3150d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3151d71ae5a4SJacob Faibussowitsch { 3152f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3153f268cba8SShri Abhyankar IS iscol = a->col, isrow = a->row; 3154f268cba8SShri Abhyankar PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3155f268cba8SShri Abhyankar const PetscInt *rout, *cout, *r, *c; 3156f268cba8SShri Abhyankar PetscScalar *x, *tmp, sum; 3157f268cba8SShri Abhyankar const PetscScalar *b; 3158f268cba8SShri Abhyankar const MatScalar *aa = a->a, *v; 3159f268cba8SShri Abhyankar 3160f268cba8SShri Abhyankar PetscFunctionBegin; 31613ba16761SJacob Faibussowitsch if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3162f268cba8SShri Abhyankar 31639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b)); 31649566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(xx, &x)); 3165f268cba8SShri Abhyankar tmp = a->solve_work; 3166f268cba8SShri Abhyankar 31679371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout)); 31689371c9d4SSatish Balay r = rout; 31699371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout)); 31709371c9d4SSatish Balay c = cout; 3171f268cba8SShri Abhyankar 3172f268cba8SShri Abhyankar /* forward solve the lower triangular */ 3173f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 3174f268cba8SShri Abhyankar v = aa; 3175f268cba8SShri Abhyankar vi = aj; 3176f268cba8SShri Abhyankar for (i = 1; i < n; i++) { 3177f268cba8SShri Abhyankar nz = ai[i + 1] - ai[i]; 3178f268cba8SShri Abhyankar sum = b[r[i]]; 3179f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3180f268cba8SShri Abhyankar tmp[i] = sum; 31819371c9d4SSatish Balay v += nz; 31829371c9d4SSatish Balay vi += nz; 3183f268cba8SShri Abhyankar } 3184f268cba8SShri Abhyankar 3185f268cba8SShri Abhyankar /* backward solve the upper triangular */ 3186f268cba8SShri Abhyankar for (i = n - 1; i >= 0; i--) { 31873c0119dfSShri Abhyankar v = aa + adiag[i + 1] + 1; 31883c0119dfSShri Abhyankar vi = aj + adiag[i + 1] + 1; 3189f268cba8SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1; 3190f268cba8SShri Abhyankar sum = tmp[i]; 3191f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3192f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3193f268cba8SShri Abhyankar } 3194f268cba8SShri Abhyankar 31959566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout)); 31969566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout)); 31979566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b)); 31989566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(xx, &x)); 31999566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 32003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3201f268cba8SShri Abhyankar } 3202f268cba8SShri Abhyankar 3203ff6a9541SJacob Faibussowitsch #if 0 3204ff6a9541SJacob Faibussowitsch // unused 3205fe97e370SBarry Smith /* 32066aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3207fe97e370SBarry Smith */ 3208ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3209d71ae5a4SJacob Faibussowitsch { 32101d098868SHong Zhang Mat B = *fact; 3211599ef60dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3212599ef60dSHong Zhang IS isicol; 32131d098868SHong Zhang const PetscInt *r, *ic; 32141d098868SHong Zhang PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3215dc3a2fd3SHong Zhang PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3216f61a948aSLisandro Dalcin PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 32171d098868SHong Zhang PetscInt nlnk, *lnk; 32181d098868SHong Zhang PetscBT lnkbt; 3219ace3abfcSBarry Smith PetscBool row_identity, icol_identity; 32208aee2decSHong Zhang MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 32211d098868SHong Zhang const PetscInt *ics; 32221d098868SHong Zhang PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3223a2ea699eSBarry Smith PetscReal dt = info->dt, shift = info->shiftamount; 3224f61a948aSLisandro Dalcin PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3225ace3abfcSBarry Smith PetscBool missing; 3226599ef60dSHong Zhang 3227599ef60dSHong Zhang PetscFunctionBegin; 322813bcc0bdSJacob Faibussowitsch if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3229f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3230f61a948aSLisandro Dalcin 32312ef1f0ffSBarry Smith /* symbolic factorization, can be reused */ 32329566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 323328b400f6SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 32341d098868SHong Zhang adiag = a->diag; 3235599ef60dSHong Zhang 32369566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3237599ef60dSHong Zhang 3238599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 32399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 32409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3241599ef60dSHong Zhang 32421d098868SHong Zhang /* allocate row pointers bi */ 32439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * n + 2, &bi)); 32441d098868SHong Zhang 32451d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3246393d3a68SHong Zhang if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 32471d098868SHong Zhang nnz_max = ai[n] + 2 * n * dtcount + 2; 32488fc3a347SHong Zhang 32499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 32509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3251599ef60dSHong Zhang 3252599ef60dSHong Zhang /* put together the new matrix */ 32539566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3254f284f12aSHong Zhang b = (Mat_SeqAIJ *)B->data; 32552205254eSKarl Rupp 3256599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3257599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3258599ef60dSHong Zhang b->singlemalloc = PETSC_FALSE; 32592205254eSKarl Rupp 3260599ef60dSHong Zhang b->a = ba; 3261599ef60dSHong Zhang b->j = bj; 3262599ef60dSHong Zhang b->i = bi; 3263599ef60dSHong Zhang b->diag = bdiag; 3264f4259b30SLisandro Dalcin b->ilen = NULL; 3265f4259b30SLisandro Dalcin b->imax = NULL; 3266599ef60dSHong Zhang b->row = isrow; 3267599ef60dSHong Zhang b->col = iscol; 32689566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 32699566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 3270599ef60dSHong Zhang b->icol = isicol; 3271599ef60dSHong Zhang 32729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 32731d098868SHong Zhang b->maxnz = nnz_max; 3274599ef60dSHong Zhang 3275d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ILUDT; 3276f284f12aSHong Zhang B->info.factor_mallocs = 0; 3277f284f12aSHong Zhang B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 32782ef1f0ffSBarry Smith /* end of symbolic factorization */ 3279599ef60dSHong Zhang 32809566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 32819566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 3282599ef60dSHong Zhang ics = ic; 3283599ef60dSHong Zhang 3284599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3285599ef60dSHong Zhang nlnk = n + 1; 32869566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 32871d098868SHong Zhang 32881d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 32899566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 32901d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 32919566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 32929566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, n)); 3293599ef60dSHong Zhang 3294599ef60dSHong Zhang bi[0] = 0; 32958fc3a347SHong Zhang bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3296dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 32978fc3a347SHong Zhang bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3298599ef60dSHong Zhang for (i = 0; i < n; i++) { 3299599ef60dSHong Zhang /* copy initial fill into linked list */ 33008369b28dSHong Zhang nzi = ai[r[i] + 1] - ai[r[i]]; 330194bad497SJacob Faibussowitsch PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 33028369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 33038369b28dSHong Zhang nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3304599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 33059566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3306599ef60dSHong Zhang 3307599ef60dSHong Zhang /* load in initial (unfactored row) */ 33081d098868SHong Zhang aatmp = a->a + ai[r[i]]; 3309ad540459SPierre Jolivet for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3310599ef60dSHong Zhang 3311599ef60dSHong Zhang /* add pivot rows into linked list */ 3312599ef60dSHong Zhang row = lnk[n]; 3313599ef60dSHong Zhang while (row < i) { 33141d098868SHong Zhang nzi_bl = bi[row + 1] - bi[row] + 1; 33151d098868SHong Zhang bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 33169566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3317599ef60dSHong Zhang nzi += nlnk; 3318599ef60dSHong Zhang row = lnk[row]; 3319599ef60dSHong Zhang } 3320599ef60dSHong Zhang 33218369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 33229566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3323599ef60dSHong Zhang 3324599ef60dSHong Zhang /* numerical factorization */ 33258369b28dSHong Zhang bjtmp = jtmp; 3326599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3327599ef60dSHong Zhang while (row < i) { 3328599ef60dSHong Zhang pc = rtmp + row; 33293c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 33303c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3331599ef60dSHong Zhang *pc = multiplier; 33323c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 33331d098868SHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 33341d098868SHong Zhang pv = ba + bdiag[row + 1] + 1; 33351d098868SHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 33361d098868SHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 33379566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3338599ef60dSHong Zhang } 3339599ef60dSHong Zhang row = *bjtmp++; 3340599ef60dSHong Zhang } 3341599ef60dSHong Zhang 33428369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 33438aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 33449371c9d4SSatish Balay nzi_bl = 0; 33459371c9d4SSatish Balay j = 0; 33468369b28dSHong Zhang while (jtmp[j] < i) { /* Note: jtmp is sorted */ 33479371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 33489371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 33499371c9d4SSatish Balay nzi_bl++; 33509371c9d4SSatish Balay j++; 33518369b28dSHong Zhang } 33528369b28dSHong Zhang nzi_bu = nzi - nzi_bl - 1; 33538369b28dSHong Zhang while (j < nzi) { 33549371c9d4SSatish Balay vtmp[j] = rtmp[jtmp[j]]; 33559371c9d4SSatish Balay rtmp[jtmp[j]] = 0.0; 33568369b28dSHong Zhang j++; 33578369b28dSHong Zhang } 33588369b28dSHong Zhang 3359599ef60dSHong Zhang bjtmp = bj + bi[i]; 3360599ef60dSHong Zhang batmp = ba + bi[i]; 33618369b28dSHong Zhang /* apply level dropping rule to L part */ 33628369b28dSHong Zhang ncut = nzi_al + dtcount; 33638369b28dSHong Zhang if (ncut < nzi_bl) { 33649566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 33659566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3366599ef60dSHong Zhang } else { 33678369b28dSHong Zhang ncut = nzi_bl; 33688369b28dSHong Zhang } 33698369b28dSHong Zhang for (j = 0; j < ncut; j++) { 33708369b28dSHong Zhang bjtmp[j] = jtmp[j]; 33718369b28dSHong Zhang batmp[j] = vtmp[j]; 33728369b28dSHong Zhang } 33731d098868SHong Zhang bi[i + 1] = bi[i] + ncut; 33748369b28dSHong Zhang nzi = ncut + 1; 33758369b28dSHong Zhang 33768369b28dSHong Zhang /* apply level dropping rule to U part */ 33778369b28dSHong Zhang ncut = nzi_au + dtcount; 33788369b28dSHong Zhang if (ncut < nzi_bu) { 33799566063dSJacob Faibussowitsch PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 33809566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 33818369b28dSHong Zhang } else { 33828369b28dSHong Zhang ncut = nzi_bu; 33838369b28dSHong Zhang } 33848369b28dSHong Zhang nzi += ncut; 33851d098868SHong Zhang 33861d098868SHong Zhang /* mark bdiagonal */ 33871d098868SHong Zhang bdiag[i + 1] = bdiag[i] - (ncut + 1); 3388dc3a2fd3SHong Zhang bdiag_rev[n - i - 1] = bdiag[i + 1]; 33898fc3a347SHong Zhang bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 33901d098868SHong Zhang bjtmp = bj + bdiag[i]; 33911d098868SHong Zhang batmp = ba + bdiag[i]; 33921d098868SHong Zhang *bjtmp = i; 33938aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 3394ad540459SPierre Jolivet if (*batmp == 0.0) *batmp = dt + shift; 3395a5b23f4aSJose E. Roman *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 33961d098868SHong Zhang 33971d098868SHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 33981d098868SHong Zhang batmp = ba + bdiag[i + 1] + 1; 33998369b28dSHong Zhang for (k = 0; k < ncut; k++) { 34001d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl + 1 + k]; 34011d098868SHong Zhang batmp[k] = vtmp[nzi_bl + 1 + k]; 3402599ef60dSHong Zhang } 3403599ef60dSHong Zhang 3404599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 34058369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 340663a3b9bcSJacob Faibussowitsch PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3407599ef60dSHong Zhang 34089566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 34099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 3410599ef60dSHong Zhang 34119566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 34129566063dSJacob Faibussowitsch PetscCall(PetscFree2(im, jtmp)); 34139566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, vtmp)); 34149566063dSJacob Faibussowitsch PetscCall(PetscFree(bdiag_rev)); 3415599ef60dSHong Zhang 34169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(B->cmap->n)); 34171d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3418599ef60dSHong Zhang 34199566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 34209566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &icol_identity)); 3421599ef60dSHong Zhang if (row_identity && icol_identity) { 342235233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3423599ef60dSHong Zhang } else { 342435233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ; 3425599ef60dSHong Zhang } 3426599ef60dSHong Zhang 3427f4259b30SLisandro Dalcin B->ops->solveadd = NULL; 3428f4259b30SLisandro Dalcin B->ops->solvetranspose = NULL; 3429f4259b30SLisandro Dalcin B->ops->solvetransposeadd = NULL; 3430f4259b30SLisandro Dalcin B->ops->matsolve = NULL; 3431599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3432599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 34333ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3434599ef60dSHong Zhang } 3435599ef60dSHong Zhang 3436da81f932SPierre Jolivet /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3437fe97e370SBarry Smith /* 34386aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3439fe97e370SBarry Smith */ 3440fe97e370SBarry Smith 3441ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3442d71ae5a4SJacob Faibussowitsch { 34433c2a7987SHong Zhang PetscFunctionBegin; 34449566063dSJacob Faibussowitsch PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 34453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 34463c2a7987SHong Zhang } 34473c2a7987SHong Zhang 34483c2a7987SHong Zhang /* 34493c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 34503c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 34513c2a7987SHong Zhang */ 3452fe97e370SBarry Smith /* 34536aad120cSJose E. Roman This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3454fe97e370SBarry Smith */ 3455fe97e370SBarry Smith 3456ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3457d71ae5a4SJacob Faibussowitsch { 34583c2a7987SHong Zhang Mat C = fact; 34593c2a7987SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 34603c2a7987SHong Zhang IS isrow = b->row, isicol = b->icol; 34613c2a7987SHong Zhang const PetscInt *r, *ic, *ics; 3462b78a477dSHong Zhang PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3463b78a477dSHong Zhang PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 34643c2a7987SHong Zhang MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3465182b8fbaSHong Zhang PetscReal dt = info->dt, shift = info->shiftamount; 3466ace3abfcSBarry Smith PetscBool row_identity, col_identity; 34673c2a7987SHong Zhang 34683c2a7987SHong Zhang PetscFunctionBegin; 34699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 34709566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 34719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp)); 34723c2a7987SHong Zhang ics = ic; 34733c2a7987SHong Zhang 34743c2a7987SHong Zhang for (i = 0; i < n; i++) { 3475b78a477dSHong Zhang /* initialize rtmp array */ 3476b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 34773c2a7987SHong Zhang bjtmp = bj + bi[i]; 3478b78a477dSHong Zhang for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3479b78a477dSHong Zhang rtmp[i] = 0.0; 3480b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3481b78a477dSHong Zhang bjtmp = bj + bdiag[i + 1] + 1; 3482b78a477dSHong Zhang for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 34833c2a7987SHong Zhang 3484b78a477dSHong Zhang /* load in initial unfactored row of A */ 34853c2a7987SHong Zhang nz = ai[r[i] + 1] - ai[r[i]]; 34863c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 34873c2a7987SHong Zhang v = aa + ai[r[i]]; 3488ad540459SPierre Jolivet for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 34893c2a7987SHong Zhang 3490b78a477dSHong Zhang /* numerical factorization */ 3491b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3492b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3493b78a477dSHong Zhang k = 0; 3494b78a477dSHong Zhang while (k < nzl) { 34953c2a7987SHong Zhang row = *bjtmp++; 34963c2a7987SHong Zhang pc = rtmp + row; 3497b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3498b78a477dSHong Zhang multiplier = (*pc) * (*pv); 34993c2a7987SHong Zhang *pc = multiplier; 3500b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt) { 3501b78a477dSHong Zhang pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3502b78a477dSHong Zhang pv = b->a + bdiag[row + 1] + 1; 3503b78a477dSHong Zhang nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3504b78a477dSHong Zhang for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 35059566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1 + 2.0 * nz)); 35063c2a7987SHong Zhang } 3507b78a477dSHong Zhang k++; 35083c2a7987SHong Zhang } 35093c2a7987SHong Zhang 3510b78a477dSHong Zhang /* finished row so stick it into b->a */ 3511b78a477dSHong Zhang /* L-part */ 3512b78a477dSHong Zhang pv = b->a + bi[i]; 3513b78a477dSHong Zhang pj = bj + bi[i]; 3514b78a477dSHong Zhang nzl = bi[i + 1] - bi[i]; 3515ad540459SPierre Jolivet for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3516b78a477dSHong Zhang 3517a5b23f4aSJose E. Roman /* diagonal: invert diagonal entries for simpler triangular solves */ 3518b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3519b78a477dSHong Zhang b->a[bdiag[i]] = 1.0 / rtmp[i]; 3520b78a477dSHong Zhang 3521b78a477dSHong Zhang /* U-part */ 3522b78a477dSHong Zhang pv = b->a + bdiag[i + 1] + 1; 3523b78a477dSHong Zhang pj = bj + bdiag[i + 1] + 1; 3524b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i + 1] - 1; 3525ad540459SPierre Jolivet for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3526b78a477dSHong Zhang } 3527b78a477dSHong Zhang 35289566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp)); 35299566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 35309566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 3531b78a477dSHong Zhang 35329566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 35339566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity)); 35343c2a7987SHong Zhang if (row_identity && col_identity) { 353535233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 35363c2a7987SHong Zhang } else { 353735233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 35383c2a7987SHong Zhang } 3539f4259b30SLisandro Dalcin C->ops->solveadd = NULL; 3540f4259b30SLisandro Dalcin C->ops->solvetranspose = NULL; 3541f4259b30SLisandro Dalcin C->ops->solvetransposeadd = NULL; 3542f4259b30SLisandro Dalcin C->ops->matsolve = NULL; 35433c2a7987SHong Zhang C->assembled = PETSC_TRUE; 35443c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 35452205254eSKarl Rupp 35469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n)); 35473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35483c2a7987SHong Zhang } 3549ff6a9541SJacob Faibussowitsch #endif 3550