xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1b377110cSBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
63b2fbd54SBarry Smith 
78fa24674SBarry Smith /*
88fa24674SBarry Smith       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9312e372aSBarry Smith 
10312e372aSBarry Smith       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
118fa24674SBarry Smith */
129371c9d4SSatish Balay PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) {
138fa24674SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
14889a8dedSBarry Smith   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
158fa24674SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j;
168fa24674SBarry Smith   const PetscScalar *aa = a->a;
17ace3abfcSBarry Smith   PetscBool         *done;
18889a8dedSBarry Smith   PetscReal          best, past = 0, future;
193a40ed3dSBarry Smith 
208fa24674SBarry Smith   PetscFunctionBegin;
218fa24674SBarry Smith   /* pick initial row */
228fa24674SBarry Smith   best = -1;
238fa24674SBarry Smith   for (i = 0; i < n; i++) {
2475567043SBarry Smith     future = 0.0;
258fa24674SBarry Smith     for (j = ai[i]; j < ai[i + 1]; j++) {
26db4deed7SKarl Rupp       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27db4deed7SKarl Rupp       else past = PetscAbsScalar(aa[j]);
288fa24674SBarry Smith     }
298fa24674SBarry Smith     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
308fa24674SBarry Smith     if (past / future > best) {
318fa24674SBarry Smith       best    = past / future;
328fa24674SBarry Smith       current = i;
338fa24674SBarry Smith     }
348fa24674SBarry Smith   }
358fa24674SBarry Smith 
369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &done));
379566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(done, n));
389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &order));
398fa24674SBarry Smith   order[0] = current;
408fa24674SBarry Smith   for (i = 0; i < n - 1; i++) {
418fa24674SBarry Smith     done[current] = PETSC_TRUE;
428fa24674SBarry Smith     best          = -1;
438fa24674SBarry Smith     /* loop over all neighbors of current pivot */
448fa24674SBarry Smith     for (j = ai[current]; j < ai[current + 1]; j++) {
458fa24674SBarry Smith       jj = aj[j];
468fa24674SBarry Smith       if (done[jj]) continue;
478fa24674SBarry Smith       /* loop over columns of potential next row computing weights for below and above diagonal */
488fa24674SBarry Smith       past = future = 0.0;
498fa24674SBarry Smith       for (k = ai[jj]; k < ai[jj + 1]; k++) {
508fa24674SBarry Smith         kk = aj[k];
518fa24674SBarry Smith         if (done[kk]) past += PetscAbsScalar(aa[k]);
528fa24674SBarry Smith         else if (kk != jj) future += PetscAbsScalar(aa[k]);
538fa24674SBarry Smith       }
548fa24674SBarry Smith       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
558fa24674SBarry Smith       if (past / future > best) {
568fa24674SBarry Smith         best       = past / future;
578fa24674SBarry Smith         newcurrent = jj;
588fa24674SBarry Smith       }
598fa24674SBarry Smith     }
608fa24674SBarry Smith     if (best == -1) { /* no neighbors to select from so select best of all that remain */
618fa24674SBarry Smith       best = -1;
628fa24674SBarry Smith       for (k = 0; k < n; k++) {
638fa24674SBarry Smith         if (done[k]) continue;
6475567043SBarry Smith         future = 0.0;
6575567043SBarry Smith         past   = 0.0;
668fa24674SBarry Smith         for (j = ai[k]; j < ai[k + 1]; j++) {
678fa24674SBarry Smith           kk = aj[j];
688fa24674SBarry Smith           if (done[kk]) past += PetscAbsScalar(aa[j]);
698fa24674SBarry Smith           else if (kk != k) future += PetscAbsScalar(aa[j]);
708fa24674SBarry Smith         }
718fa24674SBarry Smith         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
728fa24674SBarry Smith         if (past / future > best) {
738fa24674SBarry Smith           best       = past / future;
748fa24674SBarry Smith           newcurrent = k;
758fa24674SBarry Smith         }
768fa24674SBarry Smith       }
778fa24674SBarry Smith     }
7808401ef6SPierre Jolivet     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
798fa24674SBarry Smith     current      = newcurrent;
808fa24674SBarry Smith     order[i + 1] = current;
818fa24674SBarry Smith   }
829566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
838fa24674SBarry Smith   *icol = *irow;
849566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)*irow));
859566063dSJacob Faibussowitsch   PetscCall(PetscFree(done));
869566063dSJacob Faibussowitsch   PetscCall(PetscFree(order));
87d64ed03dSBarry Smith   PetscFunctionReturn(0);
883b2fbd54SBarry Smith }
893b2fbd54SBarry Smith 
909371c9d4SSatish Balay static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) {
914ac6704cSBarry Smith   PetscFunctionBegin;
924ac6704cSBarry Smith   *type = MATSOLVERPETSC;
934ac6704cSBarry Smith   PetscFunctionReturn(0);
944ac6704cSBarry Smith }
954ac6704cSBarry Smith 
969371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) {
97d0f46423SBarry Smith   PetscInt n = A->rmap->n;
98b24902e0SBarry Smith 
99b24902e0SBarry Smith   PetscFunctionBegin;
100891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX)
101b94d7dedSBarry Smith   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
102891bceaeSHong Zhang #endif
1039566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
1049566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
105599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
1069566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
1072205254eSKarl Rupp 
10835233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
10935233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
1102205254eSKarl Rupp 
1119566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
1129566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
1139566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
1149566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
115b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1169566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
1179566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
1182205254eSKarl Rupp 
11935233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
12035233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
1219566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
1229566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
123e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124d5f3da31SBarry Smith   (*B)->factortype = ftype;
12500c67f3bSHong Zhang 
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
1279566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
128f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
1299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
130b24902e0SBarry Smith   PetscFunctionReturn(0);
131b24902e0SBarry Smith }
132b24902e0SBarry Smith 
1339371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
134416022c9SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
135289bc588SBarry Smith   IS                 isicol;
1365d0c19d7SBarry Smith   const PetscInt    *r, *ic;
1375d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
1381393bc97SHong Zhang   PetscInt          *bi, *bj, *ajtmp;
1391393bc97SHong Zhang   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
1409e046878SBarry Smith   PetscReal          f;
1414875ba38SHong Zhang   PetscInt           nlnk, *lnk, k, **bi_ptr;
1420298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1431393bc97SHong Zhang   PetscBT            lnkbt;
144ece542f9SHong Zhang   PetscBool          missing;
145289bc588SBarry Smith 
1463a40ed3dSBarry Smith   PetscFunctionBegin;
14708401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
1489566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
14928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
150ece542f9SHong Zhang 
1519566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1529566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
1539566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
154289bc588SBarry Smith 
155289bc588SBarry Smith   /* get new row pointers */
1569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
1571393bc97SHong Zhang   bi[0] = 0;
1581393bc97SHong Zhang 
1591393bc97SHong Zhang   /* bdiag is location of diagonal in factor */
1609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1611393bc97SHong Zhang   bdiag[0] = 0;
1621393bc97SHong Zhang 
1631393bc97SHong Zhang   /* linked list for storing column indices of the active row */
1641393bc97SHong Zhang   nlnk = n + 1;
1659566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
1661393bc97SHong Zhang 
1679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
1681393bc97SHong Zhang 
1691393bc97SHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
170d3d32019SBarry Smith   f = info->fill;
171aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
1729566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1731393bc97SHong Zhang   current_space = free_space;
174289bc588SBarry Smith 
175289bc588SBarry Smith   for (i = 0; i < n; i++) {
1761393bc97SHong Zhang     /* copy previous fill into linked list */
177289bc588SBarry Smith     nzi   = 0;
1781393bc97SHong Zhang     nnz   = ai[r[i] + 1] - ai[r[i]];
1791393bc97SHong Zhang     ajtmp = aj + ai[r[i]];
1809566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1811393bc97SHong Zhang     nzi += nlnk;
1821393bc97SHong Zhang 
1831393bc97SHong Zhang     /* add pivot rows into linked list */
1841393bc97SHong Zhang     row = lnk[n];
1851393bc97SHong Zhang     while (row < i) {
186d1170560SHong Zhang       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187d1170560SHong Zhang       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
1889566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
1891393bc97SHong Zhang       nzi += nlnk;
1901393bc97SHong Zhang       row = lnk[row];
191289bc588SBarry Smith     }
1921393bc97SHong Zhang     bi[i + 1] = bi[i] + nzi;
1931393bc97SHong Zhang     im[i]     = nzi;
1941393bc97SHong Zhang 
1951393bc97SHong Zhang     /* mark bdiag */
1961393bc97SHong Zhang     nzbd = 0;
1971393bc97SHong Zhang     nnz  = nzi;
1981393bc97SHong Zhang     k    = lnk[n];
1991393bc97SHong Zhang     while (nnz-- && k < i) {
2001393bc97SHong Zhang       nzbd++;
2011393bc97SHong Zhang       k = lnk[k];
2021393bc97SHong Zhang     }
2031393bc97SHong Zhang     bdiag[i] = bi[i] + nzbd;
2041393bc97SHong Zhang 
2051393bc97SHong Zhang     /* if free space is not available, make more free space */
2061393bc97SHong Zhang     if (current_space->local_remaining < nzi) {
207f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
2089566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2091393bc97SHong Zhang       reallocs++;
2101393bc97SHong Zhang     }
2111393bc97SHong Zhang 
2121393bc97SHong Zhang     /* copy data into free space, then initialize lnk */
2139566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
2142205254eSKarl Rupp 
2151393bc97SHong Zhang     bi_ptr[i] = current_space->array;
2161393bc97SHong Zhang     current_space->array += nzi;
2171393bc97SHong Zhang     current_space->local_used += nzi;
2181393bc97SHong Zhang     current_space->local_remaining -= nzi;
219289bc588SBarry Smith   }
2206cf91177SBarry Smith #if defined(PETSC_USE_INFO)
221464e8d44SSatish Balay   if (ai[n] != 0) {
2221393bc97SHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2239566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
2269566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
22705bf559cSSatish Balay   } else {
2289566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
22905bf559cSSatish Balay   }
23063ba0a88SBarry Smith #endif
2312fb3ed76SBarry Smith 
2329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
2339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
2341987afe7SBarry Smith 
2351393bc97SHong Zhang   /* destroy list of free space and other temporary array(s) */
2369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2379566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
2389566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
2399566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
240289bc588SBarry Smith 
241289bc588SBarry Smith   /* put together the new matrix */
2429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
2439566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
244719d5645SBarry Smith   b = (Mat_SeqAIJ *)(B)->data;
2452205254eSKarl Rupp 
246e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
247e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
2487c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
2492205254eSKarl Rupp 
2509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
2511393bc97SHong Zhang   b->j    = bj;
2521393bc97SHong Zhang   b->i    = bi;
2531393bc97SHong Zhang   b->diag = bdiag;
254f4259b30SLisandro Dalcin   b->ilen = NULL;
255f4259b30SLisandro Dalcin   b->imax = NULL;
256416022c9SBarry Smith   b->row  = isrow;
257416022c9SBarry Smith   b->col  = iscol;
2589566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
2599566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
26082bf6240SBarry Smith   b->icol = isicol;
2619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2621393bc97SHong Zhang 
2631393bc97SHong Zhang   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
2649566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar))));
2651393bc97SHong Zhang   b->maxnz = b->nz = bi[n];
2667fd98bd6SLois Curfman McInnes 
267d5f3da31SBarry Smith   (B)->factortype            = MAT_FACTOR_LU;
268719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
269719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
270703deb49SSatish Balay 
2719f7d0b68SHong Zhang   if (ai[n]) {
272719d5645SBarry Smith     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
273eea2913aSSatish Balay   } else {
274719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
275eea2913aSSatish Balay   }
276ad04f41aSHong Zhang   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2779371c9d4SSatish Balay   if (a->inode.size) { (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
2783a40ed3dSBarry Smith   PetscFunctionReturn(0);
279289bc588SBarry Smith }
2801393bc97SHong Zhang 
2819371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
282ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
283ce3d78c0SShri Abhyankar   IS                 isicol;
2848758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
2858758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
2868758e1faSBarry Smith   PetscInt          *bi, *bj;
287ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
288ce3d78c0SShri Abhyankar   PetscReal          f;
289ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
2900298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
291ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
292ece542f9SHong Zhang   PetscBool          missing;
293ce3d78c0SShri Abhyankar 
294ce3d78c0SShri Abhyankar   PetscFunctionBegin;
29508401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
2969566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
29728b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
298ece542f9SHong Zhang 
2999566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3009566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
3019566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
302ce3d78c0SShri Abhyankar 
3031bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
3049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
3059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
306a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
3079b48462bSShri Abhyankar 
3089b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
3099b48462bSShri Abhyankar   nlnk = n + 1;
3109566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3119b48462bSShri Abhyankar 
3129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
3139b48462bSShri Abhyankar 
3149b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
3159b48462bSShri Abhyankar   f = info->fill;
316aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
3179566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
3189b48462bSShri Abhyankar   current_space = free_space;
3199b48462bSShri Abhyankar 
3209b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
3219b48462bSShri Abhyankar     /* copy previous fill into linked list */
3229b48462bSShri Abhyankar     nzi   = 0;
3239b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
3249b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
3259566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3269b48462bSShri Abhyankar     nzi += nlnk;
3279b48462bSShri Abhyankar 
3289b48462bSShri Abhyankar     /* add pivot rows into linked list */
3299b48462bSShri Abhyankar     row = lnk[n];
3309b48462bSShri Abhyankar     while (row < i) {
3319b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
3329b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
3339566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
3349b48462bSShri Abhyankar       nzi += nlnk;
3359b48462bSShri Abhyankar       row = lnk[row];
3369b48462bSShri Abhyankar     }
3379b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
3389b48462bSShri Abhyankar     im[i]     = nzi;
3399b48462bSShri Abhyankar 
3409b48462bSShri Abhyankar     /* mark bdiag */
3419b48462bSShri Abhyankar     nzbd = 0;
3429b48462bSShri Abhyankar     nnz  = nzi;
3439b48462bSShri Abhyankar     k    = lnk[n];
3449b48462bSShri Abhyankar     while (nnz-- && k < i) {
3459b48462bSShri Abhyankar       nzbd++;
3469b48462bSShri Abhyankar       k = lnk[k];
3479b48462bSShri Abhyankar     }
3489b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
3499b48462bSShri Abhyankar 
3509b48462bSShri Abhyankar     /* if free space is not available, make more free space */
3519b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
3522f18eb33SBarry Smith       /* estimated additional space needed */
3531df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
3549566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
3559b48462bSShri Abhyankar       reallocs++;
3569b48462bSShri Abhyankar     }
3579b48462bSShri Abhyankar 
3589b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
3599566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
3602205254eSKarl Rupp 
3619b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
3629b48462bSShri Abhyankar     current_space->array += nzi;
3639b48462bSShri Abhyankar     current_space->local_used += nzi;
3649b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
3659b48462bSShri Abhyankar   }
3669b48462bSShri Abhyankar 
3679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3689566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3699b48462bSShri Abhyankar 
3709263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
3719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
3739566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
3749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
3759b48462bSShri Abhyankar 
3769b48462bSShri Abhyankar   /* put together the new matrix */
3779566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3789566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
3799b48462bSShri Abhyankar   b = (Mat_SeqAIJ *)(B)->data;
3802205254eSKarl Rupp 
3819b48462bSShri Abhyankar   b->free_a       = PETSC_TRUE;
3829b48462bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
3839b48462bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
3842205254eSKarl Rupp 
3859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
3862205254eSKarl Rupp 
3879b48462bSShri Abhyankar   b->j    = bj;
3889b48462bSShri Abhyankar   b->i    = bi;
3899b48462bSShri Abhyankar   b->diag = bdiag;
390f4259b30SLisandro Dalcin   b->ilen = NULL;
391f4259b30SLisandro Dalcin   b->imax = NULL;
3929b48462bSShri Abhyankar   b->row  = isrow;
3939b48462bSShri Abhyankar   b->col  = iscol;
3949566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
3959566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3969b48462bSShri Abhyankar   b->icol = isicol;
3979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3989b48462bSShri Abhyankar 
3999b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
4009566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar))));
4019b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
4022205254eSKarl Rupp 
403d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
404f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
405f284f12aSHong Zhang   B->info.fill_ratio_given = f;
4069b48462bSShri Abhyankar 
4079b48462bSShri Abhyankar   if (ai[n]) {
408f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
4099b48462bSShri Abhyankar   } else {
410f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
4119b48462bSShri Abhyankar   }
4129263d837SHong Zhang #if defined(PETSC_USE_INFO)
4139263d837SHong Zhang   if (ai[n] != 0) {
4149263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
4159566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
4169566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
4179566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
4189566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
4199263d837SHong Zhang   } else {
4209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
4219263d837SHong Zhang   }
4229263d837SHong Zhang #endif
42335233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
4249371c9d4SSatish Balay   if (a->inode.size) { B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
4259566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
4269b48462bSShri Abhyankar   PetscFunctionReturn(0);
4279b48462bSShri Abhyankar }
4289b48462bSShri Abhyankar 
4295b5bc046SBarry Smith /*
4305b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
4315b5bc046SBarry Smith */
4329371c9d4SSatish Balay PetscErrorCode MatFactorDumpMatrix(Mat A) {
433ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
4345b5bc046SBarry Smith 
4355b5bc046SBarry Smith   PetscFunctionBegin;
4369566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
4375b5bc046SBarry Smith   if (flg) {
4385b5bc046SBarry Smith     PetscViewer viewer;
4395b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
4405b5bc046SBarry Smith 
4419566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
4429566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
4439566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
4449566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
4455b5bc046SBarry Smith   }
4465b5bc046SBarry Smith   PetscFunctionReturn(0);
4475b5bc046SBarry Smith }
4485b5bc046SBarry Smith 
4499371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
450c9c72f8fSHong Zhang   Mat              C = B;
451c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
452c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
453c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
454bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
455bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
456bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
457bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
458bbd65245SShri Abhyankar   const MatScalar *aa = a->a, *v;
459ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
460d90ac83dSHong Zhang   FactorShiftCtx   sctx;
4614f81c4b7SBarry Smith   const PetscInt  *ddiag;
462d4ad06f7SHong Zhang   PetscReal        rs;
463d4ad06f7SHong Zhang   MatScalar        d;
464d4ad06f7SHong Zhang 
465c9c72f8fSHong Zhang   PetscFunctionBegin;
466d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4679566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
468d4ad06f7SHong Zhang 
469f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
470d4ad06f7SHong Zhang     ddiag          = a->diag;
471d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
472d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
473d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
474d4ad06f7SHong Zhang       d  = (aa)[ddiag[i]];
475d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
476d4ad06f7SHong Zhang       v  = aa + ai[i];
477d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
4782205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
479d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
480d4ad06f7SHong Zhang     }
481d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
482d4ad06f7SHong Zhang     sctx.nshift_max = 5;
483d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
484d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
485d4ad06f7SHong Zhang   }
486d4ad06f7SHong Zhang 
4879566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4889566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
490c9c72f8fSHong Zhang   ics = ic;
491c9c72f8fSHong Zhang 
492d4ad06f7SHong Zhang   do {
49307b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
494c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
495c9c72f8fSHong Zhang       /* zero rtmp */
496c9c72f8fSHong Zhang       /* L part */
497c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
498c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
499c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
500c9c72f8fSHong Zhang 
501c9c72f8fSHong Zhang       /* U part */
50262fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
50362fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
504813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
505813a1f2bSShri Abhyankar 
506813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
507813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
508813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
509813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
5109371c9d4SSatish Balay       for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; }
511813a1f2bSShri Abhyankar       /* ZeropivotApply() */
51298a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
513813a1f2bSShri Abhyankar 
514813a1f2bSShri Abhyankar       /* elimination */
515813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
516813a1f2bSShri Abhyankar       row   = *bjtmp++;
517813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
518f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
519813a1f2bSShri Abhyankar         pc = rtmp + row;
520813a1f2bSShri Abhyankar         if (*pc != 0.0) {
521813a1f2bSShri Abhyankar           pv         = b->a + bdiag[row];
522813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
523813a1f2bSShri Abhyankar           *pc        = multiplier;
5242205254eSKarl Rupp 
52562fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
52662fbd6afSShri Abhyankar           pv = b->a + bdiag[row + 1] + 1;
52762fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
5282205254eSKarl Rupp 
529813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5309566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
531813a1f2bSShri Abhyankar         }
532f268cba8SShri Abhyankar         row = *bjtmp++;
533813a1f2bSShri Abhyankar       }
534813a1f2bSShri Abhyankar 
535813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
536813a1f2bSShri Abhyankar       rs = 0.0;
537813a1f2bSShri Abhyankar       /* L part */
538813a1f2bSShri Abhyankar       pv = b->a + bi[i];
539813a1f2bSShri Abhyankar       pj = b->j + bi[i];
540813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
541813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5429371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5439371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
544813a1f2bSShri Abhyankar       }
545813a1f2bSShri Abhyankar 
546813a1f2bSShri Abhyankar       /* U part */
54762fbd6afSShri Abhyankar       pv = b->a + bdiag[i + 1] + 1;
54862fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
54962fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
550813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5519371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5529371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
553813a1f2bSShri Abhyankar       }
554813a1f2bSShri Abhyankar 
555813a1f2bSShri Abhyankar       sctx.rs = rs;
556813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
5579566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
55807b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
55907b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
560813a1f2bSShri Abhyankar 
561a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
562813a1f2bSShri Abhyankar       pv  = b->a + bdiag[i];
563813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
564813a1f2bSShri Abhyankar 
565813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
566813a1f2bSShri Abhyankar 
5678ff23777SHong Zhang     /* MatPivotRefine() */
56807b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
569813a1f2bSShri Abhyankar       /*
570813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
571813a1f2bSShri Abhyankar        * then try lower shift
572813a1f2bSShri Abhyankar        */
573813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
574813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
575813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
57607b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
577813a1f2bSShri Abhyankar       sctx.nshift++;
578813a1f2bSShri Abhyankar     }
57907b50cabSHong Zhang   } while (sctx.newshift);
580813a1f2bSShri Abhyankar 
5819566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5829566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5839566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
584d3ac4fa3SBarry Smith 
5859566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5869566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
587abb87a52SBarry Smith   if (b->inode.size) {
588abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
589abb87a52SBarry Smith   } else if (row_identity && col_identity) {
59035233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
591813a1f2bSShri Abhyankar   } else {
59235233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
593813a1f2bSShri Abhyankar   }
59435233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
59535233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
59635233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
59735233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
598813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
599813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
6002205254eSKarl Rupp 
6019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
602813a1f2bSShri Abhyankar 
60314d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
604813a1f2bSShri Abhyankar   if (sctx.nshift) {
605f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
6069566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
607f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
6089566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
609f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
6109566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
611813a1f2bSShri Abhyankar     }
612813a1f2bSShri Abhyankar   }
613813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
614813a1f2bSShri Abhyankar }
615813a1f2bSShri Abhyankar 
6169371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
617719d5645SBarry Smith   Mat              C = B;
618416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
61982bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
6205d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
621d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
622d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
623d278edc7SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
624d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
625d278edc7SBarry Smith   const MatScalar *v, *aa = a->a;
62633f9893dSHong Zhang   PetscReal        rs = 0.0;
627d90ac83dSHong Zhang   FactorShiftCtx   sctx;
6284f81c4b7SBarry Smith   const PetscInt  *ddiag;
629ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
630289bc588SBarry Smith 
6313a40ed3dSBarry Smith   PetscFunctionBegin;
632504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
6339566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
634ada7143aSSatish Balay 
635f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
63642898933SHong Zhang     ddiag          = a->diag;
6379f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
6386cc28720Svictorle     for (i = 0; i < n; i++) {
6399f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
6409f7d0b68SHong Zhang       d  = (aa)[ddiag[i]];
6411a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
6429f7d0b68SHong Zhang       v  = aa + ai[i];
6439f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
6442205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
6451a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
6466cc28720Svictorle     }
647118f5789SBarry Smith     sctx.shift_top *= 1.1;
648d2276718SHong Zhang     sctx.nshift_max = 5;
649d2276718SHong Zhang     sctx.shift_lo   = 0.;
650d2276718SHong Zhang     sctx.shift_hi   = 1.;
651a0a356efSHong Zhang   }
652a0a356efSHong Zhang 
6539566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6549566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
656504a3fadSHong Zhang   ics = ic;
657504a3fadSHong Zhang 
658085256b3SBarry Smith   do {
65907b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
660289bc588SBarry Smith     for (i = 0; i < n; i++) {
661b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
662b3bf805bSHong Zhang       bjtmp = bj + bi[i];
6639f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
664289bc588SBarry Smith 
665289bc588SBarry Smith       /* load in initial (unfactored row) */
6669f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
6679f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
6689f7d0b68SHong Zhang       v     = aa + ai[r[i]];
6699371c9d4SSatish Balay       for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; }
670d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
671289bc588SBarry Smith 
672b3bf805bSHong Zhang       row = *bjtmp++;
673f2582111SSatish Balay       while (row < i) {
6748c37ef55SBarry Smith         pc = rtmp + row;
6758c37ef55SBarry Smith         if (*pc != 0.0) {
676010a20caSHong Zhang           pv         = b->a + diag_offset[row];
677010a20caSHong Zhang           pj         = b->j + diag_offset[row] + 1;
67835aab85fSBarry Smith           multiplier = *pc / *pv++;
6798c37ef55SBarry Smith           *pc        = multiplier;
680b3bf805bSHong Zhang           nz         = bi[row + 1] - diag_offset[row] - 1;
6819f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6829566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
683289bc588SBarry Smith         }
684b3bf805bSHong Zhang         row = *bjtmp++;
685289bc588SBarry Smith       }
686416022c9SBarry Smith       /* finished row so stick it into b->a */
687b3bf805bSHong Zhang       pv   = b->a + bi[i];
688b3bf805bSHong Zhang       pj   = b->j + bi[i];
689b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
690b3bf805bSHong Zhang       diag = diag_offset[i] - bi[i];
691e57ff13aSBarry Smith       rs   = 0.0;
692d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
6939f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
6949f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
695d3d32019SBarry Smith       }
696e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
697d2276718SHong Zhang 
698d2276718SHong Zhang       sctx.rs = rs;
699d2276718SHong Zhang       sctx.pv = pv[diag];
7009566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
70107b50cabSHong Zhang       if (sctx.newshift) break;
702504a3fadSHong Zhang       pv[diag] = sctx.pv;
70335aab85fSBarry Smith     }
704d2276718SHong Zhang 
70507b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
7066cc28720Svictorle       /*
7076c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
7086cc28720Svictorle        * then try lower shift
7096cc28720Svictorle        */
7100481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
7110481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
7120481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
71307b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
714d2276718SHong Zhang       sctx.nshift++;
7156cc28720Svictorle     }
71607b50cabSHong Zhang   } while (sctx.newshift);
7170f11f9aeSSatish Balay 
718a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
7199371c9d4SSatish Balay   for (i = 0; i < n; i++) { b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; }
7209566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7219566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7229566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
723d3ac4fa3SBarry Smith 
7249566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
7259566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
7268d8c7c43SBarry Smith   if (row_identity && col_identity) {
727ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
7288d8c7c43SBarry Smith   } else {
729ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
730db4efbfdSBarry Smith   }
731ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
732ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
733ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
734ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
7352205254eSKarl Rupp 
736c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
7375c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
7382205254eSKarl Rupp 
7399566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
740d2276718SHong Zhang   if (sctx.nshift) {
741f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7429566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
743f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7449566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
7456cc28720Svictorle     }
7460697b6caSHong Zhang   }
747d3ac4fa3SBarry Smith   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
748d3ac4fa3SBarry Smith   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
7492205254eSKarl Rupp 
7509566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
7513a40ed3dSBarry Smith   PetscFunctionReturn(0);
752289bc588SBarry Smith }
7530889b5dcSvictorle 
754137fb511SHong Zhang /*
755137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
756137fb511SHong Zhang    Input:
757137fb511SHong Zhang      A - original matrix
758137fb511SHong Zhang    Output;
759137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
760137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
761137fb511SHong Zhang          a->a reordered accordingly with a->j
762137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
763137fb511SHong Zhang */
7649371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) {
765b051339dSHong Zhang   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
766b051339dSHong Zhang   IS               isrow = a->row, isicol = a->icol;
7675d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
7685d0c19d7SBarry Smith   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
7695d0c19d7SBarry Smith   PetscInt        *ajtmp, nz, row;
770b051339dSHong Zhang   PetscInt        *diag = a->diag, nbdiag, *pj;
771a77337e4SBarry Smith   PetscScalar     *rtmp, *pc, multiplier, d;
772504a3fadSHong Zhang   MatScalar       *pv, *v;
773137fb511SHong Zhang   PetscReal        rs;
774d90ac83dSHong Zhang   FactorShiftCtx   sctx;
775504a3fadSHong Zhang   const MatScalar *aa = a->a, *vtmp;
776137fb511SHong Zhang 
777137fb511SHong Zhang   PetscFunctionBegin;
77808401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
779504a3fadSHong Zhang 
780504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
7819566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
782504a3fadSHong Zhang 
783504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
784504a3fadSHong Zhang     const PetscInt *ddiag = a->diag;
785504a3fadSHong Zhang     sctx.shift_top        = info->zeropivot;
786504a3fadSHong Zhang     for (i = 0; i < n; i++) {
787504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
788504a3fadSHong Zhang       d    = (aa)[ddiag[i]];
789504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
790504a3fadSHong Zhang       vtmp = aa + ai[i];
791504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
7922205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
793504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
794504a3fadSHong Zhang     }
795504a3fadSHong Zhang     sctx.shift_top *= 1.1;
796504a3fadSHong Zhang     sctx.nshift_max = 5;
797504a3fadSHong Zhang     sctx.shift_lo   = 0.;
798504a3fadSHong Zhang     sctx.shift_hi   = 1.;
799504a3fadSHong Zhang   }
800504a3fadSHong Zhang 
8019566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
8029566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
8039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
8049566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
805b051339dSHong Zhang   ics = ic;
806137fb511SHong Zhang 
807504a3fadSHong Zhang #if defined(MV)
80875567043SBarry Smith   sctx.shift_top      = 0.;
809e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
81075567043SBarry Smith   sctx.shift_lo       = 0.;
81175567043SBarry Smith   sctx.shift_hi       = 0.;
81275567043SBarry Smith   sctx.shift_fraction = 0.;
813137fb511SHong Zhang 
814f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
81575567043SBarry Smith     sctx.shift_top = 0.;
816137fb511SHong Zhang     for (i = 0; i < n; i++) {
817137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
818b051339dSHong Zhang       d  = (a->a)[diag[i]];
819137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
820b051339dSHong Zhang       v  = a->a + ai[i];
821b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
8222205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
823137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
824137fb511SHong Zhang     }
825137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
826137fb511SHong Zhang     sctx.shift_top *= 1.1;
827137fb511SHong Zhang     sctx.nshift_max = 5;
828137fb511SHong Zhang     sctx.shift_lo   = 0.;
829137fb511SHong Zhang     sctx.shift_hi   = 1.;
830137fb511SHong Zhang   }
831137fb511SHong Zhang 
83275567043SBarry Smith   sctx.shift_amount = 0.;
833137fb511SHong Zhang   sctx.nshift       = 0;
834504a3fadSHong Zhang #endif
835504a3fadSHong Zhang 
836137fb511SHong Zhang   do {
83707b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
838137fb511SHong Zhang     for (i = 0; i < n; i++) {
839b051339dSHong Zhang       /* load in initial unfactored row */
840b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
841b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
842b051339dSHong Zhang       v     = a->a + ai[r[i]];
843137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
844b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
8459566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
846137fb511SHong Zhang 
847b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
848137fb511SHong Zhang       for (j = 0; j < nz; j++) {
849137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
850b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
851137fb511SHong Zhang       }
852137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
853137fb511SHong Zhang 
854b051339dSHong Zhang       row = *ajtmp++;
855137fb511SHong Zhang       while (row < i) {
856137fb511SHong Zhang         pc = rtmp + row;
857137fb511SHong Zhang         if (*pc != 0.0) {
858b051339dSHong Zhang           pv = a->a + diag[r[row]];
859b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
860137fb511SHong Zhang 
861137fb511SHong Zhang           multiplier = *pc / *pv++;
862137fb511SHong Zhang           *pc        = multiplier;
863b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
864b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
8659566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
866137fb511SHong Zhang         }
867b051339dSHong Zhang         row = *ajtmp++;
868137fb511SHong Zhang       }
869b051339dSHong Zhang       /* finished row so overwrite it onto a->a */
870b051339dSHong Zhang       pv     = a->a + ai[r[i]];
871b051339dSHong Zhang       pj     = aj + ai[r[i]];
872b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
873b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
874137fb511SHong Zhang 
875137fb511SHong Zhang       rs = 0.0;
876137fb511SHong Zhang       for (j = 0; j < nz; j++) {
877b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
878b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
879137fb511SHong Zhang       }
880137fb511SHong Zhang 
881137fb511SHong Zhang       sctx.rs = rs;
882b051339dSHong Zhang       sctx.pv = pv[nbdiag];
8839566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
88407b50cabSHong Zhang       if (sctx.newshift) break;
885504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
886137fb511SHong Zhang     }
887137fb511SHong Zhang 
88807b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
889137fb511SHong Zhang       /*
890137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
891137fb511SHong Zhang        * then try lower shift
892137fb511SHong Zhang        */
8930481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
8940481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
8950481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
89607b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
897137fb511SHong Zhang       sctx.nshift++;
898137fb511SHong Zhang     }
89907b50cabSHong Zhang   } while (sctx.newshift);
900137fb511SHong Zhang 
901a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
9029371c9d4SSatish Balay   for (i = 0; i < n; i++) { a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; }
903137fb511SHong Zhang 
9049566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
9059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
9069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
9072205254eSKarl Rupp 
908b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
909ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
910ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
911ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
9122205254eSKarl Rupp 
913b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
9145c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
9152205254eSKarl Rupp 
9169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
917137fb511SHong Zhang   if (sctx.nshift) {
918f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
9199566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
920f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
9219566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
922137fb511SHong Zhang     }
923137fb511SHong Zhang   }
924137fb511SHong Zhang   PetscFunctionReturn(0);
925137fb511SHong Zhang }
926137fb511SHong Zhang 
9270a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
9289371c9d4SSatish Balay PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) {
929416022c9SBarry Smith   Mat C;
930416022c9SBarry Smith 
9313a40ed3dSBarry Smith   PetscFunctionBegin;
9329566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
9339566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
9349566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
9352205254eSKarl Rupp 
936db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
937db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
9382205254eSKarl Rupp 
9399566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
9409566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)((Mat_SeqAIJ *)(A->data))->icol));
9413a40ed3dSBarry Smith   PetscFunctionReturn(0);
942da3a660dSBarry Smith }
9430a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
9441d098868SHong Zhang 
9459371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
946416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
947416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
9485d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
9495d0c19d7SBarry Smith   PetscInt           nz;
9505d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
951dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
952d9fead3dSBarry Smith   const PetscScalar *b;
953dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
9548c37ef55SBarry Smith 
9553a40ed3dSBarry Smith   PetscFunctionBegin;
9563a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
95791bf9adeSBarry Smith 
9589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
9599566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
960416022c9SBarry Smith   tmp = a->solve_work;
9618c37ef55SBarry Smith 
9629371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
9639371c9d4SSatish Balay   r = rout;
9649371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
9659371c9d4SSatish Balay   c = cout + (n - 1);
9668c37ef55SBarry Smith 
9678c37ef55SBarry Smith   /* forward solve the lower triangular */
9688c37ef55SBarry Smith   tmp[0] = b[*r++];
969010a20caSHong Zhang   tmps   = tmp;
9708c37ef55SBarry Smith   for (i = 1; i < n; i++) {
971010a20caSHong Zhang     v   = aa + ai[i];
972010a20caSHong Zhang     vi  = aj + ai[i];
97353e7425aSBarry Smith     nz  = a->diag[i] - ai[i];
97453e7425aSBarry Smith     sum = b[*r++];
975003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
9768c37ef55SBarry Smith     tmp[i] = sum;
9778c37ef55SBarry Smith   }
9788c37ef55SBarry Smith 
9798c37ef55SBarry Smith   /* backward solve the upper triangular */
9808c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
981010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
982010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
983416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
9848c37ef55SBarry Smith     sum = tmp[i];
985003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
986010a20caSHong Zhang     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
9878c37ef55SBarry Smith   }
9888c37ef55SBarry Smith 
9899566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
9909566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
9919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
9929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
9939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
9943a40ed3dSBarry Smith   PetscFunctionReturn(0);
9958c37ef55SBarry Smith }
996026e39d0SSatish Balay 
9979371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) {
9982bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
9992bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
10005d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1001910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10025d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
1003910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1004910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1005910cf402Sprj-   PetscBool          isdense;
10062bebee5dSHong Zhang 
10072bebee5dSHong Zhang   PetscFunctionBegin;
10082bebee5dSHong Zhang   if (!n) PetscFunctionReturn(0);
10099566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
101028b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1011f9fb9879SHong Zhang   if (X != B) {
10129566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
101328b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1014f9fb9879SHong Zhang   }
10159566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10169566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10179566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10189566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10199371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10209371c9d4SSatish Balay   r = rout;
10219371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10229371c9d4SSatish Balay   c = cout;
1023a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
10242bebee5dSHong Zhang     /* forward solve the lower triangular */
10252bebee5dSHong Zhang     tmp[0] = b[r[0]];
10262bebee5dSHong Zhang     tmps   = tmp;
10272bebee5dSHong Zhang     for (i = 1; i < n; i++) {
10282bebee5dSHong Zhang       v   = aa + ai[i];
10292bebee5dSHong Zhang       vi  = aj + ai[i];
10302bebee5dSHong Zhang       nz  = a->diag[i] - ai[i];
10312bebee5dSHong Zhang       sum = b[r[i]];
1032003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10332bebee5dSHong Zhang       tmp[i] = sum;
10342bebee5dSHong Zhang     }
10352bebee5dSHong Zhang     /* backward solve the upper triangular */
10362bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
10372bebee5dSHong Zhang       v   = aa + a->diag[i] + 1;
10382bebee5dSHong Zhang       vi  = aj + a->diag[i] + 1;
10392bebee5dSHong Zhang       nz  = ai[i + 1] - a->diag[i] - 1;
10402bebee5dSHong Zhang       sum = tmp[i];
1041003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10422bebee5dSHong Zhang       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
10432bebee5dSHong Zhang     }
1044910cf402Sprj-     b += ldb;
1045910cf402Sprj-     x += ldx;
10462bebee5dSHong Zhang   }
10479566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10489566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10499566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
10509566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
10519566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
10522bebee5dSHong Zhang   PetscFunctionReturn(0);
10532bebee5dSHong Zhang }
10542bebee5dSHong Zhang 
10559371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) {
10569bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10579bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
10589bd0847aSShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1059910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10609bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
1061910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
1062910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1063910cf402Sprj-   PetscBool          isdense;
10649bd0847aSShri Abhyankar 
10659bd0847aSShri Abhyankar   PetscFunctionBegin;
10669bd0847aSShri Abhyankar   if (!n) PetscFunctionReturn(0);
10679566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
106828b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1069f9fb9879SHong Zhang   if (X != B) {
10709566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
107128b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1072f9fb9879SHong Zhang   }
10739566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10749566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10759566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10769566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10779371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10789371c9d4SSatish Balay   r = rout;
10799371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10809371c9d4SSatish Balay   c = cout;
10819bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
10829bd0847aSShri Abhyankar     /* forward solve the lower triangular */
10839bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
10849bd0847aSShri Abhyankar     v      = aa;
10859bd0847aSShri Abhyankar     vi     = aj;
10869bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
10879bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
10889bd0847aSShri Abhyankar       sum = b[r[i]];
10899bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
10909bd0847aSShri Abhyankar       tmp[i] = sum;
10919371c9d4SSatish Balay       v += nz;
10929371c9d4SSatish Balay       vi += nz;
10939bd0847aSShri Abhyankar     }
10949bd0847aSShri Abhyankar     /* backward solve the upper triangular */
10959bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
10969bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
10979bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
10989bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
10999bd0847aSShri Abhyankar       sum = tmp[i];
11009bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
11019bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
11029bd0847aSShri Abhyankar     }
1103910cf402Sprj-     b += ldb;
1104910cf402Sprj-     x += ldx;
11059bd0847aSShri Abhyankar   }
11069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11079566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11089566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
11099566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
11109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
11119bd0847aSShri Abhyankar   PetscFunctionReturn(0);
11129bd0847aSShri Abhyankar }
11139bd0847aSShri Abhyankar 
11149371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) {
1115137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1116137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
11175d0c19d7SBarry Smith   const PetscInt    *r, *c, *rout, *cout;
11185d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
11195d0c19d7SBarry Smith   PetscInt           nz, row;
1120fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1121fdc842d1SBarry Smith   const PetscScalar *b;
1122dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1123137fb511SHong Zhang 
1124137fb511SHong Zhang   PetscFunctionBegin;
1125137fb511SHong Zhang   if (!n) PetscFunctionReturn(0);
1126137fb511SHong Zhang 
11279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1129137fb511SHong Zhang   tmp = a->solve_work;
1130137fb511SHong Zhang 
11319371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11329371c9d4SSatish Balay   r = rout;
11339371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11349371c9d4SSatish Balay   c = cout + (n - 1);
1135137fb511SHong Zhang 
1136137fb511SHong Zhang   /* forward solve the lower triangular */
1137137fb511SHong Zhang   tmp[0] = b[*r++];
1138137fb511SHong Zhang   tmps   = tmp;
1139137fb511SHong Zhang   for (row = 1; row < n; row++) {
1140137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1141137fb511SHong Zhang     v   = aa + ai[i];
1142137fb511SHong Zhang     vi  = aj + ai[i];
1143137fb511SHong Zhang     nz  = a->diag[i] - ai[i];
1144137fb511SHong Zhang     sum = b[*r++];
1145003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1146137fb511SHong Zhang     tmp[row] = sum;
1147137fb511SHong Zhang   }
1148137fb511SHong Zhang 
1149137fb511SHong Zhang   /* backward solve the upper triangular */
1150137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1151137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1152137fb511SHong Zhang     v   = aa + a->diag[i] + 1;
1153137fb511SHong Zhang     vi  = aj + a->diag[i] + 1;
1154137fb511SHong Zhang     nz  = ai[i + 1] - a->diag[i] - 1;
1155137fb511SHong Zhang     sum = tmp[row];
1156003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1157137fb511SHong Zhang     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1158137fb511SHong Zhang   }
1159137fb511SHong Zhang 
11609566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
11649566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1165137fb511SHong Zhang   PetscFunctionReturn(0);
1166137fb511SHong Zhang }
1167137fb511SHong Zhang 
1168930ae53cSBarry Smith /* ----------------------------------------------------------- */
1169c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
11709371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
1171930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1172d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1173b377110cSBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1174356650c2SBarry Smith   PetscScalar       *x;
1175356650c2SBarry Smith   const PetscScalar *b;
1176dd6ea824SBarry Smith   const MatScalar   *aa = a->a;
1177aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1178356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1179b377110cSBarry Smith   const PetscInt  *vi;
1180dd6ea824SBarry Smith   const MatScalar *v;
1181dd6ea824SBarry Smith   PetscScalar      sum;
1182d85afc42SSatish Balay #endif
1183930ae53cSBarry Smith 
11843a40ed3dSBarry Smith   PetscFunctionBegin;
11853a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
1186930ae53cSBarry Smith 
11879566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1189930ae53cSBarry Smith 
1190aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
11911c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
11921c4feecaSSatish Balay #else
1193930ae53cSBarry Smith   /* forward solve the lower triangular */
1194930ae53cSBarry Smith   x[0] = b[0];
1195930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1196930ae53cSBarry Smith     ai_i = ai[i];
1197930ae53cSBarry Smith     v    = aa + ai_i;
1198930ae53cSBarry Smith     vi   = aj + ai_i;
1199930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1200930ae53cSBarry Smith     sum  = b[i];
1201003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1202930ae53cSBarry Smith     x[i] = sum;
1203930ae53cSBarry Smith   }
1204930ae53cSBarry Smith 
1205930ae53cSBarry Smith   /* backward solve the upper triangular */
1206930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1207930ae53cSBarry Smith     adiag_i = adiag[i];
1208d708749eSBarry Smith     v       = aa + adiag_i + 1;
1209d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1210930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1211930ae53cSBarry Smith     sum     = x[i];
1212003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1213930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1214930ae53cSBarry Smith   }
12151c4feecaSSatish Balay #endif
12169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
12193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1220930ae53cSBarry Smith }
1221930ae53cSBarry Smith 
12229371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) {
1223416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1224416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
122504fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
12265d0c19d7SBarry Smith   PetscInt           nz;
122704fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
122804fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
122904fbf559SBarry Smith   const PetscScalar *b;
1230dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1231da3a660dSBarry Smith 
12323a40ed3dSBarry Smith   PetscFunctionBegin;
12339566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1234da3a660dSBarry Smith 
12359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1237416022c9SBarry Smith   tmp = a->solve_work;
1238da3a660dSBarry Smith 
12399371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12409371c9d4SSatish Balay   r = rout;
12419371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12429371c9d4SSatish Balay   c = cout + (n - 1);
1243da3a660dSBarry Smith 
1244da3a660dSBarry Smith   /* forward solve the lower triangular */
1245da3a660dSBarry Smith   tmp[0] = b[*r++];
1246da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1247010a20caSHong Zhang     v   = aa + ai[i];
1248010a20caSHong Zhang     vi  = aj + ai[i];
1249416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
1250da3a660dSBarry Smith     sum = b[*r++];
125104fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1252da3a660dSBarry Smith     tmp[i] = sum;
1253da3a660dSBarry Smith   }
1254da3a660dSBarry Smith 
1255da3a660dSBarry Smith   /* backward solve the upper triangular */
1256da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1257010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1258010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1259416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
1260da3a660dSBarry Smith     sum = tmp[i];
126104fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1262010a20caSHong Zhang     tmp[i] = sum * aa[a->diag[i]];
1263da3a660dSBarry Smith     x[*c--] += tmp[i];
1264da3a660dSBarry Smith   }
1265da3a660dSBarry Smith 
12669566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12709566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
12713a40ed3dSBarry Smith   PetscFunctionReturn(0);
1272da3a660dSBarry Smith }
127304fbf559SBarry Smith 
12749371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) {
12753c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
12763c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
12773c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
12783c0119dfSShri Abhyankar   PetscInt           nz;
12793c0119dfSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
12803c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
12813c0119dfSShri Abhyankar   const PetscScalar *b;
12823c0119dfSShri Abhyankar   const MatScalar   *aa = a->a, *v;
12833c0119dfSShri Abhyankar 
12843c0119dfSShri Abhyankar   PetscFunctionBegin;
12859566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
12863c0119dfSShri Abhyankar 
12879566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12889566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
12893c0119dfSShri Abhyankar   tmp = a->solve_work;
12903c0119dfSShri Abhyankar 
12919371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12929371c9d4SSatish Balay   r = rout;
12939371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12949371c9d4SSatish Balay   c = cout;
12953c0119dfSShri Abhyankar 
12963c0119dfSShri Abhyankar   /* forward solve the lower triangular */
12973c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
12983c0119dfSShri Abhyankar   v      = aa;
12993c0119dfSShri Abhyankar   vi     = aj;
13003c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
13013c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
13023c0119dfSShri Abhyankar     sum = b[r[i]];
13033c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
13043c0119dfSShri Abhyankar     tmp[i] = sum;
13052205254eSKarl Rupp     v += nz;
13062205254eSKarl Rupp     vi += nz;
13073c0119dfSShri Abhyankar   }
13083c0119dfSShri Abhyankar 
13093c0119dfSShri Abhyankar   /* backward solve the upper triangular */
13103c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
13113c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
13123c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
13133c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
13143c0119dfSShri Abhyankar     sum = tmp[i];
13153c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
13163c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
13173c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
13189371c9d4SSatish Balay     v += nz + 1;
13199371c9d4SSatish Balay     vi += nz + 1;
13203c0119dfSShri Abhyankar   }
13213c0119dfSShri Abhyankar 
13229566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13239566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
13273c0119dfSShri Abhyankar   PetscFunctionReturn(0);
13283c0119dfSShri Abhyankar }
13293c0119dfSShri Abhyankar 
13309371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
1331416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13322235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
133304fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
133404fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
133504fbf559SBarry Smith   PetscInt           nz;
133604fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1337dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
133804fbf559SBarry Smith   const PetscScalar *b;
1339da3a660dSBarry Smith 
13403a40ed3dSBarry Smith   PetscFunctionBegin;
13419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1343416022c9SBarry Smith   tmp = a->solve_work;
1344da3a660dSBarry Smith 
13459371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13469371c9d4SSatish Balay   r = rout;
13479371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13489371c9d4SSatish Balay   c = cout;
1349da3a660dSBarry Smith 
1350da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
13512235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1352da3a660dSBarry Smith 
1353da3a660dSBarry Smith   /* forward solve the U^T */
1354da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1355010a20caSHong Zhang     v  = aa + diag[i];
1356010a20caSHong Zhang     vi = aj + diag[i] + 1;
1357f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1358c38d4ed2SBarry Smith     s1 = tmp[i];
1359ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
136004fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1361c38d4ed2SBarry Smith     tmp[i] = s1;
1362da3a660dSBarry Smith   }
1363da3a660dSBarry Smith 
1364da3a660dSBarry Smith   /* backward solve the L^T */
1365da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1366010a20caSHong Zhang     v  = aa + diag[i] - 1;
1367010a20caSHong Zhang     vi = aj + diag[i] - 1;
1368f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1369f1af5d2fSBarry Smith     s1 = tmp[i];
137004fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1371da3a660dSBarry Smith   }
1372da3a660dSBarry Smith 
1373da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1374591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1375da3a660dSBarry Smith 
13769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13779566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1380da3a660dSBarry Smith 
13819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13823a40ed3dSBarry Smith   PetscFunctionReturn(0);
1383da3a660dSBarry Smith }
1384da3a660dSBarry Smith 
13859371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) {
1386d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1387d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1388d1fa9404SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1389d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1390d1fa9404SShri Abhyankar   PetscInt           nz;
1391d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1392d1fa9404SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1393d1fa9404SShri Abhyankar   const PetscScalar *b;
1394d1fa9404SShri Abhyankar 
1395d1fa9404SShri Abhyankar   PetscFunctionBegin;
13969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1398d1fa9404SShri Abhyankar   tmp = a->solve_work;
1399d1fa9404SShri Abhyankar 
14009371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14019371c9d4SSatish Balay   r = rout;
14029371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14039371c9d4SSatish Balay   c = cout;
1404d1fa9404SShri Abhyankar 
1405d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1406d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1407d1fa9404SShri Abhyankar 
1408d1fa9404SShri Abhyankar   /* forward solve the U^T */
1409d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1410d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1411d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1412d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1413d1fa9404SShri Abhyankar     s1 = tmp[i];
1414d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1415d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1416d1fa9404SShri Abhyankar     tmp[i] = s1;
1417d1fa9404SShri Abhyankar   }
1418d1fa9404SShri Abhyankar 
1419d1fa9404SShri Abhyankar   /* backward solve the L^T */
1420d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1421d1fa9404SShri Abhyankar     v  = aa + ai[i];
1422d1fa9404SShri Abhyankar     vi = aj + ai[i];
1423d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1424d1fa9404SShri Abhyankar     s1 = tmp[i];
1425d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1426d1fa9404SShri Abhyankar   }
1427d1fa9404SShri Abhyankar 
1428d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1429d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1430d1fa9404SShri Abhyankar 
14319566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1435d1fa9404SShri Abhyankar 
14369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1437d1fa9404SShri Abhyankar   PetscFunctionReturn(0);
1438d1fa9404SShri Abhyankar }
1439d1fa9404SShri Abhyankar 
14409371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) {
1441416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
14422235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
144304fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
144404fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
144504fbf559SBarry Smith   PetscInt           nz;
144604fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1447dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
144804fbf559SBarry Smith   const PetscScalar *b;
14496abc6512SBarry Smith 
14503a40ed3dSBarry Smith   PetscFunctionBegin;
14519566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
14529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1454416022c9SBarry Smith   tmp = a->solve_work;
14556abc6512SBarry Smith 
14569371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14579371c9d4SSatish Balay   r = rout;
14589371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14599371c9d4SSatish Balay   c = cout;
14606abc6512SBarry Smith 
14616abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
14622235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
14636abc6512SBarry Smith 
14646abc6512SBarry Smith   /* forward solve the U^T */
14656abc6512SBarry Smith   for (i = 0; i < n; i++) {
1466010a20caSHong Zhang     v  = aa + diag[i];
1467010a20caSHong Zhang     vi = aj + diag[i] + 1;
1468f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
146904fbf559SBarry Smith     s1 = tmp[i];
147004fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
147104fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
147204fbf559SBarry Smith     tmp[i] = s1;
14736abc6512SBarry Smith   }
14746abc6512SBarry Smith 
14756abc6512SBarry Smith   /* backward solve the L^T */
14766abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1477010a20caSHong Zhang     v  = aa + diag[i] - 1;
1478010a20caSHong Zhang     vi = aj + diag[i] - 1;
1479f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
148004fbf559SBarry Smith     s1 = tmp[i];
148104fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
14826abc6512SBarry Smith   }
14836abc6512SBarry Smith 
14846abc6512SBarry Smith   /* copy tmp into x according to permutation */
14856abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
14866abc6512SBarry Smith 
14879566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14889566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
14916abc6512SBarry Smith 
14929566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1494da3a660dSBarry Smith }
149504fbf559SBarry Smith 
14969371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) {
1497533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1498533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1499533e6011SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1500533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1501533e6011SShri Abhyankar   PetscInt           nz;
1502533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1503533e6011SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1504533e6011SShri Abhyankar   const PetscScalar *b;
1505533e6011SShri Abhyankar 
1506533e6011SShri Abhyankar   PetscFunctionBegin;
15079566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
15089566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
15099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1510533e6011SShri Abhyankar   tmp = a->solve_work;
1511533e6011SShri Abhyankar 
15129371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
15139371c9d4SSatish Balay   r = rout;
15149371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
15159371c9d4SSatish Balay   c = cout;
1516533e6011SShri Abhyankar 
1517533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1518533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1519533e6011SShri Abhyankar 
1520533e6011SShri Abhyankar   /* forward solve the U^T */
1521533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1522533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1523533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1524533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1525533e6011SShri Abhyankar     s1 = tmp[i];
1526533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1527533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1528533e6011SShri Abhyankar     tmp[i] = s1;
1529533e6011SShri Abhyankar   }
1530533e6011SShri Abhyankar 
1531533e6011SShri Abhyankar   /* backward solve the L^T */
1532533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1533533e6011SShri Abhyankar     v  = aa + ai[i];
1534533e6011SShri Abhyankar     vi = aj + ai[i];
1535533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1536533e6011SShri Abhyankar     s1 = tmp[i];
1537c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1538533e6011SShri Abhyankar   }
1539533e6011SShri Abhyankar 
1540533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1541533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1542533e6011SShri Abhyankar 
15439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
15459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1547533e6011SShri Abhyankar 
15489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1549533e6011SShri Abhyankar   PetscFunctionReturn(0);
1550533e6011SShri Abhyankar }
1551533e6011SShri Abhyankar 
15529e25ed09SBarry Smith /* ----------------------------------------------------------------*/
1553b47f5a65SHong Zhang 
15548fc3a347SHong Zhang /*
15558a76ed4fSHong Zhang    ilu() under revised new data structure.
1556813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1557813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1558813a1f2bSShri Abhyankar 
1559813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1560baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1561baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1562baabb860SHong Zhang 
1563813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1564baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1565a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1566813a1f2bSShri Abhyankar 
1567813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1568813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1569813a1f2bSShri Abhyankar */
15709371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1571813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1572bbd65245SShri Abhyankar   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1573bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
15741df811f5SHong Zhang   IS             isicol;
1575813a1f2bSShri Abhyankar 
1576813a1f2bSShri Abhyankar   PetscFunctionBegin;
15779566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
15789566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1579813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
1580813a1f2bSShri Abhyankar 
1581813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
15829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
15839566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, ai[n] * (sizeof(PetscScalar) + sizeof(PetscInt)) + (n + 1) * sizeof(PetscInt)));
15842205254eSKarl Rupp 
1585813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_TRUE;
1586813a1f2bSShri Abhyankar   if (!b->diag) {
15879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n + 1, &b->diag));
15889566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)fact, (n + 1) * sizeof(PetscInt)));
1589813a1f2bSShri Abhyankar   }
1590813a1f2bSShri Abhyankar   bdiag = b->diag;
1591813a1f2bSShri Abhyankar 
1592*48a46eb9SPierre Jolivet   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1593813a1f2bSShri Abhyankar 
1594813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1595813a1f2bSShri Abhyankar   bi = b->i;
1596813a1f2bSShri Abhyankar   bj = b->j;
1597813a1f2bSShri Abhyankar 
1598813a1f2bSShri Abhyankar   /* L part */
1599e60cf9a0SBarry Smith   bi[0] = 0;
1600813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1601813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1602813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1603813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
16049371c9d4SSatish Balay     for (j = 0; j < nz; j++) { bj[k++] = aj[j]; }
1605813a1f2bSShri Abhyankar   }
1606813a1f2bSShri Abhyankar 
1607813a1f2bSShri Abhyankar   /* U part */
160862fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1609813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1610813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1611813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
16129371c9d4SSatish Balay     for (j = 0; j < nz; j++) { bj[k++] = aj[j]; }
1613813a1f2bSShri Abhyankar     /* diag[i] */
1614bbd65245SShri Abhyankar     bj[k++]  = i;
1615a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1616813a1f2bSShri Abhyankar   }
16171df811f5SHong Zhang 
1618d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
16191df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
16201df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
16211df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
162235233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
16239566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
16241df811f5SHong Zhang 
16251df811f5SHong Zhang   b       = (Mat_SeqAIJ *)(fact)->data;
16261df811f5SHong Zhang   b->row  = isrow;
16271df811f5SHong Zhang   b->col  = iscol;
16281df811f5SHong Zhang   b->icol = isicol;
16299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
16309566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
16319566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1632813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
1633813a1f2bSShri Abhyankar }
1634813a1f2bSShri Abhyankar 
16359371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1636813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1637813a1f2bSShri Abhyankar   IS                 isicol;
1638813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
16391df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1640813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1641813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1642813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1643035f4963SHong Zhang   PetscBool          col_identity, row_identity, missing;
1644813a1f2bSShri Abhyankar   PetscReal          f;
16450298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1646813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1647813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
16480298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
16490298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1650813a1f2bSShri Abhyankar 
1651813a1f2bSShri Abhyankar   PetscFunctionBegin;
165208401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
16539566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
165428b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1655ad04f41aSHong Zhang 
1656813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
16579566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
16589566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1659813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1660813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
16619566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
16629371c9d4SSatish Balay     if (a->inode.size) { fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
1663813a1f2bSShri Abhyankar     PetscFunctionReturn(0);
1664813a1f2bSShri Abhyankar   }
1665813a1f2bSShri Abhyankar 
16669566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
16679566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
16689566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1669813a1f2bSShri Abhyankar 
16701bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
16719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
16729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1673e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
16749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1675813a1f2bSShri Abhyankar 
1676813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1677813a1f2bSShri Abhyankar   nlnk = n + 1;
16789566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1679813a1f2bSShri Abhyankar 
1680813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
16811df811f5SHong Zhang   f             = info->fill;
16821df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
16839566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1684813a1f2bSShri Abhyankar   current_space = free_space;
16859566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1686813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1687813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1688813a1f2bSShri Abhyankar     nzi = 0;
1689813a1f2bSShri Abhyankar     /* copy current row into linked list */
1690813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
169128b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1692813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1693813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
16949566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1695813a1f2bSShri Abhyankar     nzi += nlnk;
1696813a1f2bSShri Abhyankar 
1697813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1698813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1699813a1f2bSShri Abhyankar       fm = n;
1700813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1701813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1702813a1f2bSShri Abhyankar       lnk[fm]    = i;
1703813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
17049371c9d4SSatish Balay       nzi++;
17059371c9d4SSatish Balay       dcount++;
1706813a1f2bSShri Abhyankar     }
1707813a1f2bSShri Abhyankar 
1708813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1709813a1f2bSShri Abhyankar     nzbd = 0;
1710813a1f2bSShri Abhyankar     prow = lnk[n];
1711813a1f2bSShri Abhyankar     while (prow < i) {
1712813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1713813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1714813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1715813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
17169566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1717813a1f2bSShri Abhyankar       nzi += nlnk;
1718813a1f2bSShri Abhyankar       prow = lnk[prow];
1719813a1f2bSShri Abhyankar       nzbd++;
1720813a1f2bSShri Abhyankar     }
1721813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1722813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1723813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1724813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1725f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
17269566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
17279566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1728813a1f2bSShri Abhyankar       reallocs++;
1729813a1f2bSShri Abhyankar     }
1730813a1f2bSShri Abhyankar 
1731813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
17329566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1733813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1734813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1735813a1f2bSShri Abhyankar 
1736813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
173708401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1738813a1f2bSShri Abhyankar 
1739813a1f2bSShri Abhyankar     current_space->array += nzi;
1740813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1741813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1742813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1743813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1744813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1745813a1f2bSShri Abhyankar   }
1746813a1f2bSShri Abhyankar 
17479566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
17489566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1749813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
17509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
17519566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1752813a1f2bSShri Abhyankar 
17539566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
17549566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
17559566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1756813a1f2bSShri Abhyankar 
1757813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1758813a1f2bSShri Abhyankar   {
1759aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
17609566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
17619566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
17629566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
17639566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
1764*48a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1765813a1f2bSShri Abhyankar   }
1766813a1f2bSShri Abhyankar #endif
1767813a1f2bSShri Abhyankar   /* put together the new matrix */
17689566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
17699566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol));
1770813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
17712205254eSKarl Rupp 
1772813a1f2bSShri Abhyankar   b->free_a       = PETSC_TRUE;
1773813a1f2bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
1774813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
17752205254eSKarl Rupp 
17769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
17772205254eSKarl Rupp 
1778813a1f2bSShri Abhyankar   b->j    = bj;
1779813a1f2bSShri Abhyankar   b->i    = bi;
1780813a1f2bSShri Abhyankar   b->diag = bdiag;
1781f4259b30SLisandro Dalcin   b->ilen = NULL;
1782f4259b30SLisandro Dalcin   b->imax = NULL;
1783813a1f2bSShri Abhyankar   b->row  = isrow;
1784813a1f2bSShri Abhyankar   b->col  = iscol;
17859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
17869566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1787813a1f2bSShri Abhyankar   b->icol = isicol;
17882205254eSKarl Rupp 
17899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1790813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1791813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
17929566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar))));
1793f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
17942205254eSKarl Rupp 
1795813a1f2bSShri Abhyankar   (fact)->info.factor_mallocs    = reallocs;
1796813a1f2bSShri Abhyankar   (fact)->info.fill_ratio_given  = f;
1797f268cba8SShri Abhyankar   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
179835233d99SShri Abhyankar   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
17999371c9d4SSatish Balay   if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
18009566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1801813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
1802813a1f2bSShri Abhyankar }
1803813a1f2bSShri Abhyankar 
18049371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
18056516239fSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
18066516239fSHong Zhang   IS                 isicol;
18076516239fSHong Zhang   const PetscInt    *r, *ic;
1808ece542f9SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
18096516239fSHong Zhang   PetscInt          *bi, *cols, nnz, *cols_lvl;
18106516239fSHong Zhang   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
18116516239fSHong Zhang   PetscInt           i, levels, diagonal_fill;
1812ace3abfcSBarry Smith   PetscBool          col_identity, row_identity;
18136516239fSHong Zhang   PetscReal          f;
18140298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
18156516239fSHong Zhang   PetscBT            lnkbt;
18166516239fSHong Zhang   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
18170298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
18180298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1819ace3abfcSBarry Smith   PetscBool          missing;
18206516239fSHong Zhang 
18216516239fSHong Zhang   PetscFunctionBegin;
182208401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
18239566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
182428b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1825ece542f9SHong Zhang 
18266516239fSHong Zhang   f             = info->fill;
18276516239fSHong Zhang   levels        = (PetscInt)info->levels;
18286516239fSHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
18292205254eSKarl Rupp 
18309566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
18316516239fSHong Zhang 
18329566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
18339566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
18346516239fSHong Zhang   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
18359566063dSJacob Faibussowitsch     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
18362205254eSKarl Rupp 
1837ad04f41aSHong Zhang     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
18389371c9d4SSatish Balay     if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
1839d5f3da31SBarry Smith     fact->factortype               = MAT_FACTOR_ILU;
1840719d5645SBarry Smith     (fact)->info.factor_mallocs    = 0;
1841719d5645SBarry Smith     (fact)->info.fill_ratio_given  = info->fill;
1842719d5645SBarry Smith     (fact)->info.fill_ratio_needed = 1.0;
18432205254eSKarl Rupp 
1844719d5645SBarry Smith     b       = (Mat_SeqAIJ *)(fact)->data;
184508480c60SBarry Smith     b->row  = isrow;
184608480c60SBarry Smith     b->col  = iscol;
184782bf6240SBarry Smith     b->icol = isicol;
18489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
18499566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)isrow));
18509566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)iscol));
18513a40ed3dSBarry Smith     PetscFunctionReturn(0);
185208480c60SBarry Smith   }
185308480c60SBarry Smith 
18549566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
18559566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
18569e25ed09SBarry Smith 
18571bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
18589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
18599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1860a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
18616cf997b0SBarry Smith 
18629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
18635a8e39fbSHong Zhang 
18645a8e39fbSHong Zhang   /* create a linked list for storing column indices of the active row */
18655a8e39fbSHong Zhang   nlnk = n + 1;
18669566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
18675a8e39fbSHong Zhang 
18685a8e39fbSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
18699566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
18705a8e39fbSHong Zhang   current_space = free_space;
18719566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
18725a8e39fbSHong Zhang   current_space_lvl = free_space_lvl;
18735a8e39fbSHong Zhang 
18745a8e39fbSHong Zhang   for (i = 0; i < n; i++) {
18755a8e39fbSHong Zhang     nzi = 0;
18766cf997b0SBarry Smith     /* copy current row into linked list */
18775a8e39fbSHong Zhang     nnz = ai[r[i] + 1] - ai[r[i]];
187828b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
18795a8e39fbSHong Zhang     cols   = aj + ai[r[i]];
18805a8e39fbSHong Zhang     lnk[i] = -1; /* marker to indicate if diagonal exists */
18819566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
18825a8e39fbSHong Zhang     nzi += nlnk;
18836cf997b0SBarry Smith 
18846cf997b0SBarry Smith     /* make sure diagonal entry is included */
18855a8e39fbSHong Zhang     if (diagonal_fill && lnk[i] == -1) {
18866cf997b0SBarry Smith       fm = n;
18875a8e39fbSHong Zhang       while (lnk[fm] < i) fm = lnk[fm];
18885a8e39fbSHong Zhang       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
18895a8e39fbSHong Zhang       lnk[fm]    = i;
18905a8e39fbSHong Zhang       lnk_lvl[i] = 0;
18919371c9d4SSatish Balay       nzi++;
18929371c9d4SSatish Balay       dcount++;
18936cf997b0SBarry Smith     }
18946cf997b0SBarry Smith 
18955a8e39fbSHong Zhang     /* add pivot rows into the active row */
18965a8e39fbSHong Zhang     nzbd = 0;
18975a8e39fbSHong Zhang     prow = lnk[n];
18985a8e39fbSHong Zhang     while (prow < i) {
18995a8e39fbSHong Zhang       nnz      = bdiag[prow];
19005a8e39fbSHong Zhang       cols     = bj_ptr[prow] + nnz + 1;
19015a8e39fbSHong Zhang       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
19025a8e39fbSHong Zhang       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
19039566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
19045a8e39fbSHong Zhang       nzi += nlnk;
19055a8e39fbSHong Zhang       prow = lnk[prow];
19065a8e39fbSHong Zhang       nzbd++;
190756beaf04SBarry Smith     }
19085a8e39fbSHong Zhang     bdiag[i]  = nzbd;
19095a8e39fbSHong Zhang     bi[i + 1] = bi[i] + nzi;
1910ecf371e4SBarry Smith 
19115a8e39fbSHong Zhang     /* if free space is not available, make more free space */
19125a8e39fbSHong Zhang     if (current_space->local_remaining < nzi) {
1913f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
19149566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
19159566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
19165a8e39fbSHong Zhang       reallocs++;
19175a8e39fbSHong Zhang     }
1918ecf371e4SBarry Smith 
19195a8e39fbSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
19209566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
19215a8e39fbSHong Zhang     bj_ptr[i]    = current_space->array;
19225a8e39fbSHong Zhang     bjlvl_ptr[i] = current_space_lvl->array;
19235a8e39fbSHong Zhang 
19245a8e39fbSHong Zhang     /* make sure the active row i has diagonal entry */
192508401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
19265a8e39fbSHong Zhang 
19275a8e39fbSHong Zhang     current_space->array += nzi;
19285a8e39fbSHong Zhang     current_space->local_used += nzi;
19295a8e39fbSHong Zhang     current_space->local_remaining -= nzi;
19305a8e39fbSHong Zhang     current_space_lvl->array += nzi;
19315a8e39fbSHong Zhang     current_space_lvl->local_used += nzi;
19325a8e39fbSHong Zhang     current_space_lvl->local_remaining -= nzi;
19339e25ed09SBarry Smith   }
19345a8e39fbSHong Zhang 
19359566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
19369566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
19375a8e39fbSHong Zhang 
19385a8e39fbSHong Zhang   /* destroy list of free space and other temporary arrays */
19399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
19409566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
19419566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
19429566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
19439566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
19449e25ed09SBarry Smith 
19456cf91177SBarry Smith #if defined(PETSC_USE_INFO)
1946f64065bbSBarry Smith   {
19475a8e39fbSHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
19489566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
19499566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
19509566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
19519566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
1952*48a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1953f64065bbSBarry Smith   }
195463ba0a88SBarry Smith #endif
1955bbb6d6a8SBarry Smith 
19569e25ed09SBarry Smith   /* put together the new matrix */
19579566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
19589566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol));
1959719d5645SBarry Smith   b = (Mat_SeqAIJ *)(fact)->data;
19602205254eSKarl Rupp 
1961e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
1962e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
19637c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
19642205254eSKarl Rupp 
19659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n], &b->a));
19665a8e39fbSHong Zhang   b->j = bj;
19675a8e39fbSHong Zhang   b->i = bi;
19685a8e39fbSHong Zhang   for (i = 0; i < n; i++) bdiag[i] += bi[i];
19695a8e39fbSHong Zhang   b->diag = bdiag;
1970f4259b30SLisandro Dalcin   b->ilen = NULL;
1971f4259b30SLisandro Dalcin   b->imax = NULL;
1972416022c9SBarry Smith   b->row  = isrow;
1973416022c9SBarry Smith   b->col  = iscol;
19749566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
19759566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
197682bf6240SBarry Smith   b->icol = isicol;
19779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1978416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
19795a8e39fbSHong Zhang      Allocate bdiag, solve_work, new a, new j */
19809566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar))));
19815a8e39fbSHong Zhang   b->maxnz = b->nz = bi[n];
19822205254eSKarl Rupp 
1983719d5645SBarry Smith   (fact)->info.factor_mallocs    = reallocs;
1984719d5645SBarry Smith   (fact)->info.fill_ratio_given  = f;
1985719d5645SBarry Smith   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1986ad04f41aSHong Zhang   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
19879371c9d4SSatish Balay   if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
19883a40ed3dSBarry Smith   PetscFunctionReturn(0);
19899e25ed09SBarry Smith }
1990d5d45c9bSBarry Smith 
19919371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
19925f44c854SHong Zhang   Mat             C  = B;
19935f44c854SHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
19945f44c854SHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
19955f44c854SHong Zhang   IS              ip = b->row, iip = b->icol;
19965f44c854SHong Zhang   const PetscInt *rip, *riip;
19975f44c854SHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
19985f44c854SHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
19995f44c854SHong Zhang   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
20005f44c854SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2001ace3abfcSBarry Smith   PetscBool       perm_identity;
2002d90ac83dSHong Zhang   FactorShiftCtx  sctx;
200398a93161SHong Zhang   PetscReal       rs;
200498a93161SHong Zhang   MatScalar       d, *v;
200598a93161SHong Zhang 
20065f44c854SHong Zhang   PetscFunctionBegin;
200798a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
20089566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
200998a93161SHong Zhang 
2010f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
201198a93161SHong Zhang     sctx.shift_top = info->zeropivot;
201298a93161SHong Zhang     for (i = 0; i < mbs; i++) {
201398a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
201498a93161SHong Zhang       d  = (aa)[a->diag[i]];
201598a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
201698a93161SHong Zhang       v  = aa + ai[i];
201798a93161SHong Zhang       nz = ai[i + 1] - ai[i];
20182205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
201998a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
202098a93161SHong Zhang     }
202198a93161SHong Zhang     sctx.shift_top *= 1.1;
202298a93161SHong Zhang     sctx.nshift_max = 5;
202398a93161SHong Zhang     sctx.shift_lo   = 0.;
202498a93161SHong Zhang     sctx.shift_hi   = 1.;
202598a93161SHong Zhang   }
20265f44c854SHong Zhang 
20279566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
20289566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
20295f44c854SHong Zhang 
20305f44c854SHong Zhang   /* allocate working arrays
20315f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
20325f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
20335f44c854SHong Zhang   */
20349566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
20355f44c854SHong Zhang 
203698a93161SHong Zhang   do {
203707b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
203898a93161SHong Zhang 
2039c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
20402e987568SSatish Balay     if (mbs) il[0] = 0;
20415f44c854SHong Zhang 
20425f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
20435f44c854SHong Zhang       /* zero rtmp */
20445f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
20455f44c854SHong Zhang       bjtmp = bj + bi[k];
20465f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
20475f44c854SHong Zhang 
20485f44c854SHong Zhang       /* load in initial unfactored row */
20495f44c854SHong Zhang       bval = ba + bi[k];
20509371c9d4SSatish Balay       jmin = ai[rip[k]];
20519371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
20525f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
20535f44c854SHong Zhang         col = riip[aj[j]];
20545f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
20555f44c854SHong Zhang           rtmp[col] = aa[j];
20565f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
20575f44c854SHong Zhang         }
20585f44c854SHong Zhang       }
205998a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
206098a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
20615f44c854SHong Zhang 
20625f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
20635f44c854SHong Zhang       dk = rtmp[k];
20645f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
20655f44c854SHong Zhang 
20665f44c854SHong Zhang       while (i < k) {
20675f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
20685f44c854SHong Zhang 
20695f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
20705f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
20715f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
20725f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
20735f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
20745f44c854SHong Zhang 
20755f44c854SHong Zhang         /* add multiple of row i to k-th row */
20769371c9d4SSatish Balay         jmin = ili + 1;
20779371c9d4SSatish Balay         jmax = bi[i + 1];
20785f44c854SHong Zhang         if (jmin < jmax) {
20795f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
20805f44c854SHong Zhang           /* update il and c2r for row i */
20815f44c854SHong Zhang           il[i]  = jmin;
20829371c9d4SSatish Balay           j      = bj[jmin];
20839371c9d4SSatish Balay           c2r[i] = c2r[j];
20849371c9d4SSatish Balay           c2r[j] = i;
20855f44c854SHong Zhang         }
20865f44c854SHong Zhang         i = nexti;
20875f44c854SHong Zhang       }
20885f44c854SHong Zhang 
20895f44c854SHong Zhang       /* copy data into U(k,:) */
209098a93161SHong Zhang       rs   = 0.0;
20919371c9d4SSatish Balay       jmin = bi[k];
20929371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
20935f44c854SHong Zhang       if (jmin < jmax) {
20945f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
20959371c9d4SSatish Balay           col   = bj[j];
20969371c9d4SSatish Balay           ba[j] = rtmp[col];
20979371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
20985f44c854SHong Zhang         }
20995f44c854SHong Zhang         /* add the k-th row into il and c2r */
21005f44c854SHong Zhang         il[k]  = jmin;
21019371c9d4SSatish Balay         i      = bj[jmin];
21029371c9d4SSatish Balay         c2r[k] = c2r[i];
21039371c9d4SSatish Balay         c2r[i] = k;
21045f44c854SHong Zhang       }
210598a93161SHong Zhang 
210698a93161SHong Zhang       /* MatPivotCheck() */
210798a93161SHong Zhang       sctx.rs = rs;
210898a93161SHong Zhang       sctx.pv = dk;
21099566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
211007b50cabSHong Zhang       if (sctx.newshift) break;
211198a93161SHong Zhang       dk = sctx.pv;
211298a93161SHong Zhang 
211398a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
211498a93161SHong Zhang     }
211507b50cabSHong Zhang   } while (sctx.newshift);
21165f44c854SHong Zhang 
21179566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
21189566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
21199566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
21205f44c854SHong Zhang 
21219566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
21225f44c854SHong Zhang   if (perm_identity) {
212335233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
212435233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
21252169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
21262169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
21275f44c854SHong Zhang   } else {
212835233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
212935233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
213035233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
213135233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
21325f44c854SHong Zhang   }
21335f44c854SHong Zhang 
21345f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
21355f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
21362205254eSKarl Rupp 
21379566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
213898a93161SHong Zhang 
213998a93161SHong Zhang   /* MatPivotView() */
214098a93161SHong Zhang   if (sctx.nshift) {
2141f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
21429566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2143f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
21449566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2145f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
21469566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
214798a93161SHong Zhang     }
214898a93161SHong Zhang   }
21495f44c854SHong Zhang   PetscFunctionReturn(0);
21505f44c854SHong Zhang }
21515f44c854SHong Zhang 
21529371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
2153719d5645SBarry Smith   Mat             C  = B;
21540968510aSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
21552ed133dbSHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
21569bfd6278SHong Zhang   IS              ip = b->row, iip = b->icol;
21575d0c19d7SBarry Smith   const PetscInt *rip, *riip;
2158c5546cabSHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
21592ed133dbSHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
21602ed133dbSHong Zhang   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2161622e2028SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2162ace3abfcSBarry Smith   PetscBool       perm_identity;
21630e95ead3SHong Zhang   FactorShiftCtx  sctx;
21640e95ead3SHong Zhang   PetscReal       rs;
21650e95ead3SHong Zhang   MatScalar       d, *v;
2166d5d45c9bSBarry Smith 
2167a6175056SHong Zhang   PetscFunctionBegin;
21680e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
21699566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
21700e95ead3SHong Zhang 
21710e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
21720e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
21730e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
21740e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
21750e95ead3SHong Zhang       d  = (aa)[a->diag[i]];
21760e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
21770e95ead3SHong Zhang       v  = aa + ai[i];
21780e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
21792205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
21800e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
21810e95ead3SHong Zhang     }
21820e95ead3SHong Zhang     sctx.shift_top *= 1.1;
21830e95ead3SHong Zhang     sctx.nshift_max = 5;
21840e95ead3SHong Zhang     sctx.shift_lo   = 0.;
21850e95ead3SHong Zhang     sctx.shift_hi   = 1.;
21860e95ead3SHong Zhang   }
2187ee45ca4aSHong Zhang 
21889566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
21899566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
21902ed133dbSHong Zhang 
21912ed133dbSHong Zhang   /* initialization */
21929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
21930e95ead3SHong Zhang 
21942ed133dbSHong Zhang   do {
219507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
21960e95ead3SHong Zhang 
2197c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
2198c5546cabSHong Zhang     il[0] = 0;
21992ed133dbSHong Zhang 
22002ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
2201c5546cabSHong Zhang       /* zero rtmp */
2202c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
2203c5546cabSHong Zhang       bjtmp = bj + bi[k];
2204c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2205c5546cabSHong Zhang 
2206c05c3958SHong Zhang       bval = ba + bi[k];
22072ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
22089371c9d4SSatish Balay       jmin = ai[rip[k]];
22099371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
22102ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
22119bfd6278SHong Zhang         col = riip[aj[j]];
22122ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
22132ed133dbSHong Zhang           rtmp[col] = aa[j];
2214c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
22152ed133dbSHong Zhang         }
22162ed133dbSHong Zhang       }
221739e3d630SHong Zhang       /* shift the diagonal of the matrix */
2218540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
22192ed133dbSHong Zhang 
22202ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
22212ed133dbSHong Zhang       dk = rtmp[k];
22222ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
22232ed133dbSHong Zhang 
22242ed133dbSHong Zhang       while (i < k) {
22252ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
22262ed133dbSHong Zhang 
22272ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
22282ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
22292ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
22302ed133dbSHong Zhang         dk += uikdi * ba[ili];
22312ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
22322ed133dbSHong Zhang 
22332ed133dbSHong Zhang         /* add multiple of row i to k-th row */
22349371c9d4SSatish Balay         jmin = ili + 1;
22359371c9d4SSatish Balay         jmax = bi[i + 1];
22362ed133dbSHong Zhang         if (jmin < jmax) {
22372ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
22382ed133dbSHong Zhang           /* update il and jl for row i */
22392ed133dbSHong Zhang           il[i] = jmin;
22409371c9d4SSatish Balay           j     = bj[jmin];
22419371c9d4SSatish Balay           jl[i] = jl[j];
22429371c9d4SSatish Balay           jl[j] = i;
22432ed133dbSHong Zhang         }
22442ed133dbSHong Zhang         i = nexti;
22452ed133dbSHong Zhang       }
22462ed133dbSHong Zhang 
22470697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
22480697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
22490697b6caSHong Zhang       rs   = 0.0;
22503c9e8598SHong Zhang       jmin = bi[k] + 1;
22513c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
22523c9e8598SHong Zhang       bcol = bj + jmin;
22539371c9d4SSatish Balay       for (j = 0; j < nz; j++) { rs += PetscAbsScalar(rtmp[bcol[j]]); }
2254540703acSHong Zhang 
2255540703acSHong Zhang       sctx.rs = rs;
2256540703acSHong Zhang       sctx.pv = dk;
22579566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
225807b50cabSHong Zhang       if (sctx.newshift) break;
22590e95ead3SHong Zhang       dk = sctx.pv;
22603c9e8598SHong Zhang 
22613c9e8598SHong Zhang       /* copy data into U(k,:) */
226239e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
22639371c9d4SSatish Balay       jmin      = bi[k] + 1;
22649371c9d4SSatish Balay       jmax      = bi[k + 1];
226539e3d630SHong Zhang       if (jmin < jmax) {
226639e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
22679371c9d4SSatish Balay           col   = bj[j];
22689371c9d4SSatish Balay           ba[j] = rtmp[col];
22693c9e8598SHong Zhang         }
227039e3d630SHong Zhang         /* add the k-th row into il and jl */
22713c9e8598SHong Zhang         il[k] = jmin;
22729371c9d4SSatish Balay         i     = bj[jmin];
22739371c9d4SSatish Balay         jl[k] = jl[i];
22749371c9d4SSatish Balay         jl[i] = k;
22753c9e8598SHong Zhang       }
22763c9e8598SHong Zhang     }
227707b50cabSHong Zhang   } while (sctx.newshift);
22780e95ead3SHong Zhang 
22799566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
22809566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
22819566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2282db4efbfdSBarry Smith 
22839566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2284db4efbfdSBarry Smith   if (perm_identity) {
22850a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22860a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22870a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22880a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2289db4efbfdSBarry Smith   } else {
22900a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
22910a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
22920a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
22930a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2294db4efbfdSBarry Smith   }
2295db4efbfdSBarry Smith 
22963c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
22973c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
22982205254eSKarl Rupp 
22999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2300540703acSHong Zhang   if (sctx.nshift) {
2301f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
23029566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2303f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
23049566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
23050697b6caSHong Zhang     }
23063c9e8598SHong Zhang   }
23073c9e8598SHong Zhang   PetscFunctionReturn(0);
23083c9e8598SHong Zhang }
2309a6175056SHong Zhang 
23108a76ed4fSHong Zhang /*
23118a76ed4fSHong Zhang    icc() under revised new data structure.
23128a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
23138a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
23148a76ed4fSHong Zhang 
23158a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
23168a76ed4fSHong Zhang    ui+
23178a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
23188a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
23198a76ed4fSHong Zhang 
23208a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
23218a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
23228a76ed4fSHong Zhang 
23238a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
23248a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
23258a76ed4fSHong Zhang */
23268a76ed4fSHong Zhang 
23279371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
23280968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2329ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2330ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
23315f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
23325d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
2333ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
23340298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
23355a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2336ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
23370298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
23380298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
23397a48dd6fSHong Zhang   PetscBT            lnkbt;
2340b635d36bSHong Zhang   IS                 iperm;
2341a6175056SHong Zhang 
2342a6175056SHong Zhang   PetscFunctionBegin;
234308401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
23449566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
234528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
23469566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
23479566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
23487a48dd6fSHong Zhang 
23499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
23509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
235139e3d630SHong Zhang   ui[0] = 0;
235239e3d630SHong Zhang 
23538a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2354622e2028SHong Zhang   if (!levels && perm_identity) {
23555f44c854SHong Zhang     for (i = 0; i < am; i++) {
2356c5546cabSHong Zhang       ncols     = ai[i + 1] - a->diag[i];
2357c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2358c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2359dc1e2a3fSHong Zhang     }
23609566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2361dc1e2a3fSHong Zhang     cols = uj;
2362dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
23635f44c854SHong Zhang       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2364dc1e2a3fSHong Zhang       ncols = ai[i + 1] - a->diag[i] - 1;
23655f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2366910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
23675f44c854SHong Zhang     }
23685f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
23699566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
23709566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
23715f44c854SHong Zhang 
23725f44c854SHong Zhang     /* initialization */
23739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
23745f44c854SHong Zhang 
23755f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
23765f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
23779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
23785f44c854SHong Zhang     for (i = 0; i < am; i++) {
23799371c9d4SSatish Balay       jl[i] = am;
23809371c9d4SSatish Balay       il[i] = 0;
23815f44c854SHong Zhang     }
23825f44c854SHong Zhang 
23835f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
23845f44c854SHong Zhang     nlnk = am + 1;
23859566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
23865f44c854SHong Zhang 
238795b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
23889566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
23895f44c854SHong Zhang     current_space = free_space;
23909566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
23915f44c854SHong Zhang     current_space_lvl = free_space_lvl;
23925f44c854SHong Zhang 
23935f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
23945f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
23955f44c854SHong Zhang       nzk   = 0;
23965f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
239728b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
23985f44c854SHong Zhang       ncols_upper = 0;
23995f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
24005f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
24015f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
24025f44c854SHong Zhang           ajtmp[ncols_upper] = i;
24035f44c854SHong Zhang           ncols_upper++;
24045f44c854SHong Zhang         }
24055f44c854SHong Zhang       }
24069566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
24075f44c854SHong Zhang       nzk += nlnk;
24085f44c854SHong Zhang 
24095f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
24105f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
24115f44c854SHong Zhang 
24125f44c854SHong Zhang       while (prow < k) {
24135f44c854SHong Zhang         nextprow = jl[prow];
24145f44c854SHong Zhang 
24155f44c854SHong Zhang         /* merge prow into k-th row */
24165f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
24175f44c854SHong Zhang         jmax  = ui[prow + 1];
24185f44c854SHong Zhang         ncols = jmax - jmin;
24195f44c854SHong Zhang         i     = jmin - ui[prow];
24205f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
24215f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
24225f44c854SHong Zhang         j     = *(uj - 1);
24239566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
24245f44c854SHong Zhang         nzk += nlnk;
24255f44c854SHong Zhang 
24265f44c854SHong Zhang         /* update il and jl for prow */
24275f44c854SHong Zhang         if (jmin < jmax) {
24285f44c854SHong Zhang           il[prow] = jmin;
24299371c9d4SSatish Balay           j        = *cols;
24309371c9d4SSatish Balay           jl[prow] = jl[j];
24319371c9d4SSatish Balay           jl[j]    = prow;
24325f44c854SHong Zhang         }
24335f44c854SHong Zhang         prow = nextprow;
24345f44c854SHong Zhang       }
24355f44c854SHong Zhang 
24365f44c854SHong Zhang       /* if free space is not available, make more free space */
24375f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
24385f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2439f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
24409566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
24419566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
24425f44c854SHong Zhang         reallocs++;
24435f44c854SHong Zhang       }
24445f44c854SHong Zhang 
24455f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
244608401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
24479566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
24485f44c854SHong Zhang 
24495f44c854SHong Zhang       /* add the k-th row into il and jl */
24505f44c854SHong Zhang       if (nzk > 1) {
24515f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
24529371c9d4SSatish Balay         jl[k] = jl[i];
24539371c9d4SSatish Balay         jl[i] = k;
24545f44c854SHong Zhang         il[k] = ui[k] + 1;
24555f44c854SHong Zhang       }
24565f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
24575f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
24585f44c854SHong Zhang 
24595f44c854SHong Zhang       current_space->array += nzk;
24605f44c854SHong Zhang       current_space->local_used += nzk;
24615f44c854SHong Zhang       current_space->local_remaining -= nzk;
24625f44c854SHong Zhang 
24635f44c854SHong Zhang       current_space_lvl->array += nzk;
24645f44c854SHong Zhang       current_space_lvl->local_used += nzk;
24655f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
24665f44c854SHong Zhang 
24675f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
24685f44c854SHong Zhang     }
24695f44c854SHong Zhang 
24709566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
24719566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
24729566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
24739566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
24745f44c854SHong Zhang 
24759263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
24769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
24779566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
24789566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
24799566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
24805f44c854SHong Zhang 
24815f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
24825f44c854SHong Zhang 
24835f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
24845f44c854SHong Zhang   b               = (Mat_SeqSBAIJ *)(fact)->data;
24855f44c854SHong Zhang   b->singlemalloc = PETSC_FALSE;
24862205254eSKarl Rupp 
24879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
24882205254eSKarl Rupp 
24895f44c854SHong Zhang   b->j         = uj;
24905f44c854SHong Zhang   b->i         = ui;
24915f44c854SHong Zhang   b->diag      = udiag;
24927f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2493f4259b30SLisandro Dalcin   b->ilen      = NULL;
2494f4259b30SLisandro Dalcin   b->imax      = NULL;
24955f44c854SHong Zhang   b->row       = perm;
24965f44c854SHong Zhang   b->col       = perm;
24979566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
24989566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
24995f44c854SHong Zhang   b->icol          = iperm;
25005f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
25012205254eSKarl Rupp 
25029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
25039566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar))));
25042205254eSKarl Rupp 
25055f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
25065f44c854SHong Zhang   b->free_a        = PETSC_TRUE;
25075f44c854SHong Zhang   b->free_ij       = PETSC_TRUE;
25085f44c854SHong Zhang 
2509f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2510f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
25115f44c854SHong Zhang   if (ai[am] != 0) {
25126a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
251395b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
25145f44c854SHong Zhang   } else {
2515f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
25165f44c854SHong Zhang   }
25179263d837SHong Zhang #if defined(PETSC_USE_INFO)
25189263d837SHong Zhang   if (ai[am] != 0) {
25199263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
25209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
25219566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
25229566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
25239263d837SHong Zhang   } else {
25249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
25259263d837SHong Zhang   }
25269263d837SHong Zhang #endif
252735233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
25285f44c854SHong Zhang   PetscFunctionReturn(0);
25295f44c854SHong Zhang }
25305f44c854SHong Zhang 
25319371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
25325f44c854SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
25335f44c854SHong Zhang   Mat_SeqSBAIJ      *b;
2534ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
25355f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
25365f44c854SHong Zhang   const PetscInt    *rip, *riip;
25375f44c854SHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
25380298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
25395f44c854SHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
25405f44c854SHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
25410298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
25420298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
25435f44c854SHong Zhang   PetscBT            lnkbt;
25445f44c854SHong Zhang   IS                 iperm;
25455f44c854SHong Zhang 
25465f44c854SHong Zhang   PetscFunctionBegin;
254708401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
25489566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
254928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
25509566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
25519566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
25525f44c854SHong Zhang 
25539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
25549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
25555f44c854SHong Zhang   ui[0] = 0;
25565f44c854SHong Zhang 
25575f44c854SHong Zhang   /* ICC(0) without matrix ordering: simply copies fill pattern */
25585f44c854SHong Zhang   if (!levels && perm_identity) {
25595f44c854SHong Zhang     for (i = 0; i < am; i++) {
25605f44c854SHong Zhang       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
25615f44c854SHong Zhang       udiag[i]  = ui[i];
2562ed59401aSHong Zhang     }
25639566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
256439e3d630SHong Zhang     cols = uj;
2565ed59401aSHong Zhang     for (i = 0; i < am; i++) {
2566ed59401aSHong Zhang       aj    = a->j + a->diag[i];
256739e3d630SHong Zhang       ncols = ui[i + 1] - ui[i];
256839e3d630SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2569ed59401aSHong Zhang     }
257039e3d630SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
25719566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
25729566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
2573b635d36bSHong Zhang 
25747a48dd6fSHong Zhang     /* initialization */
25759566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
25767a48dd6fSHong Zhang 
25777a48dd6fSHong Zhang     /* jl: linked list for storing indices of the pivot rows
25787a48dd6fSHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
25799566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
25807a48dd6fSHong Zhang     for (i = 0; i < am; i++) {
25819371c9d4SSatish Balay       jl[i] = am;
25829371c9d4SSatish Balay       il[i] = 0;
25837a48dd6fSHong Zhang     }
25847a48dd6fSHong Zhang 
25857a48dd6fSHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
25867a48dd6fSHong Zhang     nlnk = am + 1;
25879566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
25887a48dd6fSHong Zhang 
25897a48dd6fSHong Zhang     /* initial FreeSpace size is fill*(ai[am]+1) */
25909566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
25917a48dd6fSHong Zhang     current_space = free_space;
25929566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
25937a48dd6fSHong Zhang     current_space_lvl = free_space_lvl;
25947a48dd6fSHong Zhang 
25957a48dd6fSHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
25967a48dd6fSHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
25977a48dd6fSHong Zhang       nzk   = 0;
2598622e2028SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
259928b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2600622e2028SHong Zhang       ncols_upper = 0;
2601622e2028SHong Zhang       for (j = 0; j < ncols; j++) {
2602b635d36bSHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2603b635d36bSHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
26045a8e39fbSHong Zhang           ajtmp[ncols_upper] = i;
2605622e2028SHong Zhang           ncols_upper++;
2606622e2028SHong Zhang         }
2607622e2028SHong Zhang       }
26089566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
26097a48dd6fSHong Zhang       nzk += nlnk;
26107a48dd6fSHong Zhang 
26117a48dd6fSHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
26127a48dd6fSHong Zhang       prow = jl[k]; /* 1st pivot row */
26137a48dd6fSHong Zhang 
26147a48dd6fSHong Zhang       while (prow < k) {
26157a48dd6fSHong Zhang         nextprow = jl[prow];
26167a48dd6fSHong Zhang 
26177a48dd6fSHong Zhang         /* merge prow into k-th row */
26187a48dd6fSHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
26197a48dd6fSHong Zhang         jmax  = ui[prow + 1];
26207a48dd6fSHong Zhang         ncols = jmax - jmin;
2621ed59401aSHong Zhang         i     = jmin - ui[prow];
2622ed59401aSHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
26235a8e39fbSHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
26245a8e39fbSHong Zhang         j     = *(uj - 1);
26259566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
26267a48dd6fSHong Zhang         nzk += nlnk;
26277a48dd6fSHong Zhang 
26287a48dd6fSHong Zhang         /* update il and jl for prow */
26297a48dd6fSHong Zhang         if (jmin < jmax) {
26307a48dd6fSHong Zhang           il[prow] = jmin;
26319371c9d4SSatish Balay           j        = *cols;
26329371c9d4SSatish Balay           jl[prow] = jl[j];
26339371c9d4SSatish Balay           jl[j]    = prow;
26347a48dd6fSHong Zhang         }
26357a48dd6fSHong Zhang         prow = nextprow;
26367a48dd6fSHong Zhang       }
26377a48dd6fSHong Zhang 
26387a48dd6fSHong Zhang       /* if free space is not available, make more free space */
26397a48dd6fSHong Zhang       if (current_space->local_remaining < nzk) {
26407a48dd6fSHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2641f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
26429566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
26439566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
26447a48dd6fSHong Zhang         reallocs++;
26457a48dd6fSHong Zhang       }
26467a48dd6fSHong Zhang 
26477a48dd6fSHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
264828b400f6SJacob Faibussowitsch       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
26499566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
26507a48dd6fSHong Zhang 
26517a48dd6fSHong Zhang       /* add the k-th row into il and jl */
265239e3d630SHong Zhang       if (nzk > 1) {
26537a48dd6fSHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
26549371c9d4SSatish Balay         jl[k] = jl[i];
26559371c9d4SSatish Balay         jl[i] = k;
26567a48dd6fSHong Zhang         il[k] = ui[k] + 1;
26577a48dd6fSHong Zhang       }
26587a48dd6fSHong Zhang       uj_ptr[k]     = current_space->array;
26597a48dd6fSHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
26607a48dd6fSHong Zhang 
26617a48dd6fSHong Zhang       current_space->array += nzk;
26627a48dd6fSHong Zhang       current_space->local_used += nzk;
26637a48dd6fSHong Zhang       current_space->local_remaining -= nzk;
26647a48dd6fSHong Zhang 
26657a48dd6fSHong Zhang       current_space_lvl->array += nzk;
26667a48dd6fSHong Zhang       current_space_lvl->local_used += nzk;
26677a48dd6fSHong Zhang       current_space_lvl->local_remaining -= nzk;
26687a48dd6fSHong Zhang 
26697a48dd6fSHong Zhang       ui[k + 1] = ui[k] + nzk;
26707a48dd6fSHong Zhang     }
26717a48dd6fSHong Zhang 
26726cf91177SBarry Smith #if defined(PETSC_USE_INFO)
26737a48dd6fSHong Zhang     if (ai[am] != 0) {
267439e3d630SHong Zhang       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
26759566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
26769566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
26779566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
26787a48dd6fSHong Zhang     } else {
26799566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Empty matrix\n"));
26807a48dd6fSHong Zhang     }
268163ba0a88SBarry Smith #endif
26827a48dd6fSHong Zhang 
26839566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
26849566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
26859566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
26869566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
26877a48dd6fSHong Zhang 
26887a48dd6fSHong Zhang     /* destroy list of free space and other temporary array(s) */
26899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
26909566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
26919566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
26929566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
26937a48dd6fSHong Zhang 
269439e3d630SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
269539e3d630SHong Zhang 
26967a48dd6fSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
26977a48dd6fSHong Zhang 
2698f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
26997a48dd6fSHong Zhang   b->singlemalloc = PETSC_FALSE;
27002205254eSKarl Rupp 
27019566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
27022205254eSKarl Rupp 
27037a48dd6fSHong Zhang   b->j         = uj;
27047a48dd6fSHong Zhang   b->i         = ui;
27055f44c854SHong Zhang   b->diag      = udiag;
27067f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2707f4259b30SLisandro Dalcin   b->ilen      = NULL;
2708f4259b30SLisandro Dalcin   b->imax      = NULL;
27097a48dd6fSHong Zhang   b->row       = perm;
2710b635d36bSHong Zhang   b->col       = perm;
27112205254eSKarl Rupp 
27129566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
27139566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
27142205254eSKarl Rupp 
2715b635d36bSHong Zhang   b->icol          = iperm;
27167a48dd6fSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
27179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
27189566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar))));
27197a48dd6fSHong Zhang   b->maxnz = b->nz = ui[am];
2720e6b907acSBarry Smith   b->free_a        = PETSC_TRUE;
2721e6b907acSBarry Smith   b->free_ij       = PETSC_TRUE;
27227a48dd6fSHong Zhang 
2723f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2724f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
27257a48dd6fSHong Zhang   if (ai[am] != 0) {
2726f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
27277a48dd6fSHong Zhang   } else {
2728f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
27297a48dd6fSHong Zhang   }
27300a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2731a6175056SHong Zhang   PetscFunctionReturn(0);
2732a6175056SHong Zhang }
2733d5d45c9bSBarry Smith 
27349371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
27350be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
27360be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
27379186b105SHong Zhang   PetscBool          perm_identity, missing;
27380be760fbSHong Zhang   PetscReal          fill = info->fill;
27390be760fbSHong Zhang   const PetscInt    *rip, *riip;
27400be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
27410be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
27420be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
27430298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
27440be760fbSHong Zhang   PetscBT            lnkbt;
27450be760fbSHong Zhang   IS                 iperm;
27460be760fbSHong Zhang 
27470be760fbSHong Zhang   PetscFunctionBegin;
274808401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
27499566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
275028b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
27519186b105SHong Zhang 
27520be760fbSHong Zhang   /* check whether perm is the identity mapping */
27539566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
27549566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
27559566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
27569566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
27570be760fbSHong Zhang 
27580be760fbSHong Zhang   /* initialization */
27599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
27609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
27610be760fbSHong Zhang   ui[0] = 0;
27620be760fbSHong Zhang 
27630be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
27640be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
27659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
27660be760fbSHong Zhang   for (i = 0; i < am; i++) {
27679371c9d4SSatish Balay     jl[i] = am;
27689371c9d4SSatish Balay     il[i] = 0;
27690be760fbSHong Zhang   }
27700be760fbSHong Zhang 
27710be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
27720be760fbSHong Zhang   nlnk = am + 1;
27739566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
27740be760fbSHong Zhang 
277595b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
27769566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
27770be760fbSHong Zhang   current_space = free_space;
27780be760fbSHong Zhang 
27790be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
27800be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
27810be760fbSHong Zhang     nzk   = 0;
27820be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
278328b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
27840be760fbSHong Zhang     ncols_upper = 0;
27850be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
27860be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
27870be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
27880be760fbSHong Zhang         cols[ncols_upper] = i;
27890be760fbSHong Zhang         ncols_upper++;
27900be760fbSHong Zhang       }
27910be760fbSHong Zhang     }
27929566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
27930be760fbSHong Zhang     nzk += nlnk;
27940be760fbSHong Zhang 
27950be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
27960be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
27970be760fbSHong Zhang 
27980be760fbSHong Zhang     while (prow < k) {
27990be760fbSHong Zhang       nextprow = jl[prow];
28000be760fbSHong Zhang       /* merge prow into k-th row */
28010be760fbSHong Zhang       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
28020be760fbSHong Zhang       jmax     = ui[prow + 1];
28030be760fbSHong Zhang       ncols    = jmax - jmin;
28040be760fbSHong Zhang       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
28059566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
28060be760fbSHong Zhang       nzk += nlnk;
28070be760fbSHong Zhang 
28080be760fbSHong Zhang       /* update il and jl for prow */
28090be760fbSHong Zhang       if (jmin < jmax) {
28100be760fbSHong Zhang         il[prow] = jmin;
28112205254eSKarl Rupp         j        = *uj_ptr;
28122205254eSKarl Rupp         jl[prow] = jl[j];
28132205254eSKarl Rupp         jl[j]    = prow;
28140be760fbSHong Zhang       }
28150be760fbSHong Zhang       prow = nextprow;
28160be760fbSHong Zhang     }
28170be760fbSHong Zhang 
28180be760fbSHong Zhang     /* if free space is not available, make more free space */
28190be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
28200be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2821f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
28229566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
28230be760fbSHong Zhang       reallocs++;
28240be760fbSHong Zhang     }
28250be760fbSHong Zhang 
28260be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
28279566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
28280be760fbSHong Zhang 
28290be760fbSHong Zhang     /* add the k-th row into il and jl */
28307b056e98SHong Zhang     if (nzk > 1) {
28310be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
28329371c9d4SSatish Balay       jl[k] = jl[i];
28339371c9d4SSatish Balay       jl[i] = k;
28340be760fbSHong Zhang       il[k] = ui[k] + 1;
28350be760fbSHong Zhang     }
28360be760fbSHong Zhang     ui_ptr[k] = current_space->array;
28372205254eSKarl Rupp 
28380be760fbSHong Zhang     current_space->array += nzk;
28390be760fbSHong Zhang     current_space->local_used += nzk;
28400be760fbSHong Zhang     current_space->local_remaining -= nzk;
28410be760fbSHong Zhang 
28420be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
28430be760fbSHong Zhang   }
28440be760fbSHong Zhang 
28459566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
28469566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
28479566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
28480be760fbSHong Zhang 
28499263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
28509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
28519566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
28529566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
28530be760fbSHong Zhang 
28540be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
28550be760fbSHong Zhang 
2856f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
28570be760fbSHong Zhang   b->singlemalloc = PETSC_FALSE;
28580be760fbSHong Zhang   b->free_a       = PETSC_TRUE;
28590be760fbSHong Zhang   b->free_ij      = PETSC_TRUE;
28602205254eSKarl Rupp 
28619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
28622205254eSKarl Rupp 
28630be760fbSHong Zhang   b->j         = uj;
28640be760fbSHong Zhang   b->i         = ui;
28650be760fbSHong Zhang   b->diag      = udiag;
28660be760fbSHong Zhang   b->free_diag = PETSC_TRUE;
2867f4259b30SLisandro Dalcin   b->ilen      = NULL;
2868f4259b30SLisandro Dalcin   b->imax      = NULL;
28690be760fbSHong Zhang   b->row       = perm;
28700be760fbSHong Zhang   b->col       = perm;
287126fbe8dcSKarl Rupp 
28729566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28739566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28742205254eSKarl Rupp 
28750be760fbSHong Zhang   b->icol          = iperm;
28760be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
287726fbe8dcSKarl Rupp 
28789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
28799566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar))));
28802205254eSKarl Rupp 
28810be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
28820be760fbSHong Zhang 
2883f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2884f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
28850be760fbSHong Zhang   if (ai[am] != 0) {
28866a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
288795b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
28880be760fbSHong Zhang   } else {
2889f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
28900be760fbSHong Zhang   }
28919263d837SHong Zhang #if defined(PETSC_USE_INFO)
28929263d837SHong Zhang   if (ai[am] != 0) {
28939263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
28949566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
28959566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
28969566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
28979263d837SHong Zhang   } else {
28989566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
28999263d837SHong Zhang   }
29009263d837SHong Zhang #endif
290135233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
29020be760fbSHong Zhang   PetscFunctionReturn(0);
29030be760fbSHong Zhang }
29040be760fbSHong Zhang 
29059371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2906f76d2b81SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
290710c27e3dSHong Zhang   Mat_SeqSBAIJ      *b;
29089186b105SHong Zhang   PetscBool          perm_identity, missing;
290910c27e3dSHong Zhang   PetscReal          fill = info->fill;
29105d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
29115d0c19d7SBarry Smith   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
291210c27e3dSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
29132ed133dbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
29140298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
291510c27e3dSHong Zhang   PetscBT            lnkbt;
29162ed133dbSHong Zhang   IS                 iperm;
2917f76d2b81SHong Zhang 
2918f76d2b81SHong Zhang   PetscFunctionBegin;
291908401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
29209566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
292128b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
29229186b105SHong Zhang 
29232ed133dbSHong Zhang   /* check whether perm is the identity mapping */
29249566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
29259566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
29269566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
29279566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
292810c27e3dSHong Zhang 
292910c27e3dSHong Zhang   /* initialization */
29309566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
293110c27e3dSHong Zhang   ui[0] = 0;
293210c27e3dSHong Zhang 
293310c27e3dSHong Zhang   /* jl: linked list for storing indices of the pivot rows
29341393bc97SHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
29359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
29361393bc97SHong Zhang   for (i = 0; i < am; i++) {
29379371c9d4SSatish Balay     jl[i] = am;
29389371c9d4SSatish Balay     il[i] = 0;
2939f76d2b81SHong Zhang   }
294010c27e3dSHong Zhang 
294110c27e3dSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
29421393bc97SHong Zhang   nlnk = am + 1;
29439566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
294410c27e3dSHong Zhang 
29451393bc97SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
29469566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
294710c27e3dSHong Zhang   current_space = free_space;
294810c27e3dSHong Zhang 
29491393bc97SHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
295010c27e3dSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
295110c27e3dSHong Zhang     nzk   = 0;
29522ed133dbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
295394bad497SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
29542ed133dbSHong Zhang     ncols_upper = 0;
29552ed133dbSHong Zhang     for (j = 0; j < ncols; j++) {
29569bfd6278SHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
29572ed133dbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
29582ed133dbSHong Zhang         cols[ncols_upper] = i;
29592ed133dbSHong Zhang         ncols_upper++;
29602ed133dbSHong Zhang       }
29612ed133dbSHong Zhang     }
29629566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
296310c27e3dSHong Zhang     nzk += nlnk;
296410c27e3dSHong Zhang 
296510c27e3dSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
296610c27e3dSHong Zhang     prow = jl[k]; /* 1st pivot row */
296710c27e3dSHong Zhang 
296810c27e3dSHong Zhang     while (prow < k) {
296910c27e3dSHong Zhang       nextprow = jl[prow];
297010c27e3dSHong Zhang       /* merge prow into k-th row */
29711393bc97SHong Zhang       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
297210c27e3dSHong Zhang       jmax     = ui[prow + 1];
297310c27e3dSHong Zhang       ncols    = jmax - jmin;
29741393bc97SHong Zhang       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
29759566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
297610c27e3dSHong Zhang       nzk += nlnk;
297710c27e3dSHong Zhang 
297810c27e3dSHong Zhang       /* update il and jl for prow */
297910c27e3dSHong Zhang       if (jmin < jmax) {
298010c27e3dSHong Zhang         il[prow] = jmin;
29819371c9d4SSatish Balay         j        = *uj_ptr;
29829371c9d4SSatish Balay         jl[prow] = jl[j];
29839371c9d4SSatish Balay         jl[j]    = prow;
298410c27e3dSHong Zhang       }
298510c27e3dSHong Zhang       prow = nextprow;
298610c27e3dSHong Zhang     }
298710c27e3dSHong Zhang 
298810c27e3dSHong Zhang     /* if free space is not available, make more free space */
298910c27e3dSHong Zhang     if (current_space->local_remaining < nzk) {
29901393bc97SHong Zhang       i = am - k + 1;                     /* num of unfactored rows */
299110c27e3dSHong Zhang       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
29929566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
299310c27e3dSHong Zhang       reallocs++;
299410c27e3dSHong Zhang     }
299510c27e3dSHong Zhang 
299610c27e3dSHong Zhang     /* copy data into free space, then initialize lnk */
29979566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
299810c27e3dSHong Zhang 
299910c27e3dSHong Zhang     /* add the k-th row into il and jl */
300010c27e3dSHong Zhang     if (nzk - 1 > 0) {
30011393bc97SHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
30029371c9d4SSatish Balay       jl[k] = jl[i];
30039371c9d4SSatish Balay       jl[i] = k;
300410c27e3dSHong Zhang       il[k] = ui[k] + 1;
300510c27e3dSHong Zhang     }
30062ed133dbSHong Zhang     ui_ptr[k] = current_space->array;
30072205254eSKarl Rupp 
300810c27e3dSHong Zhang     current_space->array += nzk;
300910c27e3dSHong Zhang     current_space->local_used += nzk;
301010c27e3dSHong Zhang     current_space->local_remaining -= nzk;
301110c27e3dSHong Zhang 
301210c27e3dSHong Zhang     ui[k + 1] = ui[k] + nzk;
301310c27e3dSHong Zhang   }
301410c27e3dSHong Zhang 
30156cf91177SBarry Smith #if defined(PETSC_USE_INFO)
30161393bc97SHong Zhang   if (ai[am] != 0) {
30171393bc97SHong Zhang     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
30189566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
30199566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
30209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
302110c27e3dSHong Zhang   } else {
30229566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
302310c27e3dSHong Zhang   }
302463ba0a88SBarry Smith #endif
302510c27e3dSHong Zhang 
30269566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
30279566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
30289566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
302910c27e3dSHong Zhang 
303010c27e3dSHong Zhang   /* destroy list of free space and other temporary array(s) */
30319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
30329566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
30339566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
303410c27e3dSHong Zhang 
303510c27e3dSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
303610c27e3dSHong Zhang 
3037f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
303810c27e3dSHong Zhang   b->singlemalloc = PETSC_FALSE;
3039e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
3040e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
30412205254eSKarl Rupp 
30429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
30432205254eSKarl Rupp 
304410c27e3dSHong Zhang   b->j    = uj;
304510c27e3dSHong Zhang   b->i    = ui;
3046f4259b30SLisandro Dalcin   b->diag = NULL;
3047f4259b30SLisandro Dalcin   b->ilen = NULL;
3048f4259b30SLisandro Dalcin   b->imax = NULL;
304910c27e3dSHong Zhang   b->row  = perm;
30509bfd6278SHong Zhang   b->col  = perm;
30512205254eSKarl Rupp 
30529566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30539566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30542205254eSKarl Rupp 
30559bfd6278SHong Zhang   b->icol          = iperm;
305610c27e3dSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
30572205254eSKarl Rupp 
30589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
30599566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar))));
30601393bc97SHong Zhang   b->maxnz = b->nz = ui[am];
306110c27e3dSHong Zhang 
3062f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
3063f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
30641393bc97SHong Zhang   if (ai[am] != 0) {
3065f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
306610c27e3dSHong Zhang   } else {
3067f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
306810c27e3dSHong Zhang   }
30690a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3070f76d2b81SHong Zhang   PetscFunctionReturn(0);
3071f76d2b81SHong Zhang }
3072599ef60dSHong Zhang 
30739371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) {
3074813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3075813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
3076813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3077813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
3078813a1f2bSShri Abhyankar   const PetscScalar *b;
3079813a1f2bSShri Abhyankar   const MatScalar   *aa = a->a, *v;
3080813a1f2bSShri Abhyankar   PetscInt           i, nz;
3081813a1f2bSShri Abhyankar 
3082813a1f2bSShri Abhyankar   PetscFunctionBegin;
3083813a1f2bSShri Abhyankar   if (!n) PetscFunctionReturn(0);
3084813a1f2bSShri Abhyankar 
30859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
30869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3087813a1f2bSShri Abhyankar 
3088813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
3089813a1f2bSShri Abhyankar   x[0] = b[0];
3090813a1f2bSShri Abhyankar   v    = aa;
3091813a1f2bSShri Abhyankar   vi   = aj;
3092813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
3093813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
3094813a1f2bSShri Abhyankar     sum = b[i];
3095813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3096813a1f2bSShri Abhyankar     v += nz;
3097813a1f2bSShri Abhyankar     vi += nz;
3098813a1f2bSShri Abhyankar     x[i] = sum;
3099813a1f2bSShri Abhyankar   }
3100813a1f2bSShri Abhyankar 
3101813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
310262fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
31033c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
31043c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3105813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3106813a1f2bSShri Abhyankar     sum = x[i];
3107813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
31083c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3109813a1f2bSShri Abhyankar   }
3110813a1f2bSShri Abhyankar 
31119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
31129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
31139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
3114813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
3115813a1f2bSShri Abhyankar }
3116813a1f2bSShri Abhyankar 
31179371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) {
3118f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3119f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
3120f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3121f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
3122f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
3123f268cba8SShri Abhyankar   const PetscScalar *b;
3124f268cba8SShri Abhyankar   const MatScalar   *aa = a->a, *v;
3125f268cba8SShri Abhyankar 
3126f268cba8SShri Abhyankar   PetscFunctionBegin;
3127f268cba8SShri Abhyankar   if (!n) PetscFunctionReturn(0);
3128f268cba8SShri Abhyankar 
31299566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
31309566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3131f268cba8SShri Abhyankar   tmp = a->solve_work;
3132f268cba8SShri Abhyankar 
31339371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
31349371c9d4SSatish Balay   r = rout;
31359371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
31369371c9d4SSatish Balay   c = cout;
3137f268cba8SShri Abhyankar 
3138f268cba8SShri Abhyankar   /* forward solve the lower triangular */
3139f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
3140f268cba8SShri Abhyankar   v      = aa;
3141f268cba8SShri Abhyankar   vi     = aj;
3142f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
3143f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
3144f268cba8SShri Abhyankar     sum = b[r[i]];
3145f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3146f268cba8SShri Abhyankar     tmp[i] = sum;
31479371c9d4SSatish Balay     v += nz;
31489371c9d4SSatish Balay     vi += nz;
3149f268cba8SShri Abhyankar   }
3150f268cba8SShri Abhyankar 
3151f268cba8SShri Abhyankar   /* backward solve the upper triangular */
3152f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
31533c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
31543c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3155f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3156f268cba8SShri Abhyankar     sum = tmp[i];
3157f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3158f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3159f268cba8SShri Abhyankar   }
3160f268cba8SShri Abhyankar 
31619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
31629566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
31639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
31649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
31659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3166f268cba8SShri Abhyankar   PetscFunctionReturn(0);
3167f268cba8SShri Abhyankar }
3168f268cba8SShri Abhyankar 
3169fe97e370SBarry Smith /*
31706aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3171fe97e370SBarry Smith */
31729371c9d4SSatish Balay PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) {
31731d098868SHong Zhang   Mat             B = *fact;
3174599ef60dSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3175599ef60dSHong Zhang   IS              isicol;
31761d098868SHong Zhang   const PetscInt *r, *ic;
31771d098868SHong Zhang   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3178dc3a2fd3SHong Zhang   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3179f61a948aSLisandro Dalcin   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
31801d098868SHong Zhang   PetscInt        nlnk, *lnk;
31811d098868SHong Zhang   PetscBT         lnkbt;
3182ace3abfcSBarry Smith   PetscBool       row_identity, icol_identity;
31838aee2decSHong Zhang   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
31841d098868SHong Zhang   const PetscInt *ics;
31851d098868SHong Zhang   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3186a2ea699eSBarry Smith   PetscReal       dt = info->dt, shift = info->shiftamount;
3187f61a948aSLisandro Dalcin   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3188ace3abfcSBarry Smith   PetscBool       missing;
3189599ef60dSHong Zhang 
3190599ef60dSHong Zhang   PetscFunctionBegin;
3191f61a948aSLisandro Dalcin   if (dt == PETSC_DEFAULT) dt = 0.005;
3192f61a948aSLisandro Dalcin   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3193f61a948aSLisandro Dalcin 
31941d098868SHong Zhang   /* ------- symbolic factorization, can be reused ---------*/
31959566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
319628b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
31971d098868SHong Zhang   adiag = a->diag;
3198599ef60dSHong Zhang 
31999566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3200599ef60dSHong Zhang 
3201599ef60dSHong Zhang   /* bdiag is location of diagonal in factor */
32029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
32039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3204599ef60dSHong Zhang 
32051d098868SHong Zhang   /* allocate row pointers bi */
32069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * n + 2, &bi));
32071d098868SHong Zhang 
32081d098868SHong Zhang   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3209393d3a68SHong Zhang   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
32101d098868SHong Zhang   nnz_max = ai[n] + 2 * n * dtcount + 2;
32118fc3a347SHong Zhang 
32129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
32139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3214599ef60dSHong Zhang 
3215599ef60dSHong Zhang   /* put together the new matrix */
32169566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
32179566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
3218f284f12aSHong Zhang   b = (Mat_SeqAIJ *)B->data;
32192205254eSKarl Rupp 
3220599ef60dSHong Zhang   b->free_a       = PETSC_TRUE;
3221599ef60dSHong Zhang   b->free_ij      = PETSC_TRUE;
3222599ef60dSHong Zhang   b->singlemalloc = PETSC_FALSE;
32232205254eSKarl Rupp 
3224599ef60dSHong Zhang   b->a    = ba;
3225599ef60dSHong Zhang   b->j    = bj;
3226599ef60dSHong Zhang   b->i    = bi;
3227599ef60dSHong Zhang   b->diag = bdiag;
3228f4259b30SLisandro Dalcin   b->ilen = NULL;
3229f4259b30SLisandro Dalcin   b->imax = NULL;
3230599ef60dSHong Zhang   b->row  = isrow;
3231599ef60dSHong Zhang   b->col  = iscol;
32329566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
32339566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3234599ef60dSHong Zhang   b->icol = isicol;
3235599ef60dSHong Zhang 
32369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
32379566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)B, nnz_max * (sizeof(PetscInt) + sizeof(MatScalar))));
32381d098868SHong Zhang   b->maxnz = nnz_max;
3239599ef60dSHong Zhang 
3240d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_ILUDT;
3241f284f12aSHong Zhang   B->info.factor_mallocs   = 0;
3242f284f12aSHong Zhang   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
32431d098868SHong Zhang   /* ------- end of symbolic factorization ---------*/
3244599ef60dSHong Zhang 
32459566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
32469566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
3247599ef60dSHong Zhang   ics = ic;
3248599ef60dSHong Zhang 
3249599ef60dSHong Zhang   /* linked list for storing column indices of the active row */
3250599ef60dSHong Zhang   nlnk = n + 1;
32519566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
32521d098868SHong Zhang 
32531d098868SHong Zhang   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
32549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
32551d098868SHong Zhang   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
32569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
32579566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n));
3258599ef60dSHong Zhang 
3259599ef60dSHong Zhang   bi[0]         = 0;
32608fc3a347SHong Zhang   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3261dc3a2fd3SHong Zhang   bdiag_rev[n]  = bdiag[0];
32628fc3a347SHong Zhang   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3263599ef60dSHong Zhang   for (i = 0; i < n; i++) {
3264599ef60dSHong Zhang     /* copy initial fill into linked list */
32658369b28dSHong Zhang     nzi = ai[r[i] + 1] - ai[r[i]];
326694bad497SJacob Faibussowitsch     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
32678369b28dSHong Zhang     nzi_al = adiag[r[i]] - ai[r[i]];
32688369b28dSHong Zhang     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3269599ef60dSHong Zhang     ajtmp  = aj + ai[r[i]];
32709566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3271599ef60dSHong Zhang 
3272599ef60dSHong Zhang     /* load in initial (unfactored row) */
32731d098868SHong Zhang     aatmp = a->a + ai[r[i]];
32749371c9d4SSatish Balay     for (j = 0; j < nzi; j++) { rtmp[ics[*ajtmp++]] = *aatmp++; }
3275599ef60dSHong Zhang 
3276599ef60dSHong Zhang     /* add pivot rows into linked list */
3277599ef60dSHong Zhang     row = lnk[n];
3278599ef60dSHong Zhang     while (row < i) {
32791d098868SHong Zhang       nzi_bl = bi[row + 1] - bi[row] + 1;
32801d098868SHong Zhang       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
32819566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3282599ef60dSHong Zhang       nzi += nlnk;
3283599ef60dSHong Zhang       row = lnk[row];
3284599ef60dSHong Zhang     }
3285599ef60dSHong Zhang 
32868369b28dSHong Zhang     /* copy data from lnk into jtmp, then initialize lnk */
32879566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3288599ef60dSHong Zhang 
3289599ef60dSHong Zhang     /* numerical factorization */
32908369b28dSHong Zhang     bjtmp = jtmp;
3291599ef60dSHong Zhang     row   = *bjtmp++; /* 1st pivot row */
3292599ef60dSHong Zhang     while (row < i) {
3293599ef60dSHong Zhang       pc         = rtmp + row;
32943c2a7987SHong Zhang       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
32953c2a7987SHong Zhang       multiplier = (*pc) * (*pv);
3296599ef60dSHong Zhang       *pc        = multiplier;
32973c2a7987SHong Zhang       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
32981d098868SHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
32991d098868SHong Zhang         pv = ba + bdiag[row + 1] + 1;
33001d098868SHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
33011d098868SHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
33029566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3303599ef60dSHong Zhang       }
3304599ef60dSHong Zhang       row = *bjtmp++;
3305599ef60dSHong Zhang     }
3306599ef60dSHong Zhang 
33078369b28dSHong Zhang     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
33088aee2decSHong Zhang     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
33099371c9d4SSatish Balay     nzi_bl   = 0;
33109371c9d4SSatish Balay     j        = 0;
33118369b28dSHong Zhang     while (jtmp[j] < i) { /* Note: jtmp is sorted */
33129371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
33139371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
33149371c9d4SSatish Balay       nzi_bl++;
33159371c9d4SSatish Balay       j++;
33168369b28dSHong Zhang     }
33178369b28dSHong Zhang     nzi_bu = nzi - nzi_bl - 1;
33188369b28dSHong Zhang     while (j < nzi) {
33199371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
33209371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
33218369b28dSHong Zhang       j++;
33228369b28dSHong Zhang     }
33238369b28dSHong Zhang 
3324599ef60dSHong Zhang     bjtmp = bj + bi[i];
3325599ef60dSHong Zhang     batmp = ba + bi[i];
33268369b28dSHong Zhang     /* apply level dropping rule to L part */
33278369b28dSHong Zhang     ncut  = nzi_al + dtcount;
33288369b28dSHong Zhang     if (ncut < nzi_bl) {
33299566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
33309566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3331599ef60dSHong Zhang     } else {
33328369b28dSHong Zhang       ncut = nzi_bl;
33338369b28dSHong Zhang     }
33348369b28dSHong Zhang     for (j = 0; j < ncut; j++) {
33358369b28dSHong Zhang       bjtmp[j] = jtmp[j];
33368369b28dSHong Zhang       batmp[j] = vtmp[j];
33378369b28dSHong Zhang     }
33381d098868SHong Zhang     bi[i + 1] = bi[i] + ncut;
33398369b28dSHong Zhang     nzi       = ncut + 1;
33408369b28dSHong Zhang 
33418369b28dSHong Zhang     /* apply level dropping rule to U part */
33428369b28dSHong Zhang     ncut = nzi_au + dtcount;
33438369b28dSHong Zhang     if (ncut < nzi_bu) {
33449566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
33459566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
33468369b28dSHong Zhang     } else {
33478369b28dSHong Zhang       ncut = nzi_bu;
33488369b28dSHong Zhang     }
33498369b28dSHong Zhang     nzi += ncut;
33501d098868SHong Zhang 
33511d098868SHong Zhang     /* mark bdiagonal */
33521d098868SHong Zhang     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3353dc3a2fd3SHong Zhang     bdiag_rev[n - i - 1] = bdiag[i + 1];
33548fc3a347SHong Zhang     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
33551d098868SHong Zhang     bjtmp                = bj + bdiag[i];
33561d098868SHong Zhang     batmp                = ba + bdiag[i];
33571d098868SHong Zhang     *bjtmp               = i;
33588aee2decSHong Zhang     *batmp               = diag_tmp; /* rtmp[i]; */
33599371c9d4SSatish Balay     if (*batmp == 0.0) { *batmp = dt + shift; }
3360a5b23f4aSJose E. Roman     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
33611d098868SHong Zhang 
33621d098868SHong Zhang     bjtmp = bj + bdiag[i + 1] + 1;
33631d098868SHong Zhang     batmp = ba + bdiag[i + 1] + 1;
33648369b28dSHong Zhang     for (k = 0; k < ncut; k++) {
33651d098868SHong Zhang       bjtmp[k] = jtmp[nzi_bl + 1 + k];
33661d098868SHong Zhang       batmp[k] = vtmp[nzi_bl + 1 + k];
3367599ef60dSHong Zhang     }
3368599ef60dSHong Zhang 
3369599ef60dSHong Zhang     im[i] = nzi; /* used by PetscLLAddSortedLU() */
33708369b28dSHong Zhang   }              /* for (i=0; i<n; i++) */
337163a3b9bcSJacob Faibussowitsch   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3372599ef60dSHong Zhang 
33739566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
33749566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3375599ef60dSHong Zhang 
33769566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
33779566063dSJacob Faibussowitsch   PetscCall(PetscFree2(im, jtmp));
33789566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, vtmp));
33799566063dSJacob Faibussowitsch   PetscCall(PetscFree(bdiag_rev));
3380599ef60dSHong Zhang 
33819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n));
33821d098868SHong Zhang   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3383599ef60dSHong Zhang 
33849566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
33859566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &icol_identity));
3386599ef60dSHong Zhang   if (row_identity && icol_identity) {
338735233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3388599ef60dSHong Zhang   } else {
338935233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ;
3390599ef60dSHong Zhang   }
3391599ef60dSHong Zhang 
3392f4259b30SLisandro Dalcin   B->ops->solveadd          = NULL;
3393f4259b30SLisandro Dalcin   B->ops->solvetranspose    = NULL;
3394f4259b30SLisandro Dalcin   B->ops->solvetransposeadd = NULL;
3395f4259b30SLisandro Dalcin   B->ops->matsolve          = NULL;
3396599ef60dSHong Zhang   B->assembled              = PETSC_TRUE;
3397599ef60dSHong Zhang   B->preallocated           = PETSC_TRUE;
3398599ef60dSHong Zhang   PetscFunctionReturn(0);
3399599ef60dSHong Zhang }
3400599ef60dSHong Zhang 
34013c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */
3402fe97e370SBarry Smith /*
34036aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3404fe97e370SBarry Smith */
3405fe97e370SBarry Smith 
34069371c9d4SSatish Balay PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) {
34073c2a7987SHong Zhang   PetscFunctionBegin;
34089566063dSJacob Faibussowitsch   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
34093c2a7987SHong Zhang   PetscFunctionReturn(0);
34103c2a7987SHong Zhang }
34113c2a7987SHong Zhang 
34123c2a7987SHong Zhang /*
34133c2a7987SHong Zhang    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
34143c2a7987SHong Zhang    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
34153c2a7987SHong Zhang */
3416fe97e370SBarry Smith /*
34176aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3418fe97e370SBarry Smith */
3419fe97e370SBarry Smith 
34209371c9d4SSatish Balay PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) {
34213c2a7987SHong Zhang   Mat             C = fact;
34223c2a7987SHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
34233c2a7987SHong Zhang   IS              isrow = b->row, isicol = b->icol;
34243c2a7987SHong Zhang   const PetscInt *r, *ic, *ics;
3425b78a477dSHong Zhang   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3426b78a477dSHong Zhang   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
34273c2a7987SHong Zhang   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3428182b8fbaSHong Zhang   PetscReal       dt = info->dt, shift = info->shiftamount;
3429ace3abfcSBarry Smith   PetscBool       row_identity, col_identity;
34303c2a7987SHong Zhang 
34313c2a7987SHong Zhang   PetscFunctionBegin;
34329566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
34339566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
34349566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
34353c2a7987SHong Zhang   ics = ic;
34363c2a7987SHong Zhang 
34373c2a7987SHong Zhang   for (i = 0; i < n; i++) {
3438b78a477dSHong Zhang     /* initialize rtmp array */
3439b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
34403c2a7987SHong Zhang     bjtmp = bj + bi[i];
3441b78a477dSHong Zhang     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3442b78a477dSHong Zhang     rtmp[i] = 0.0;
3443b78a477dSHong Zhang     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3444b78a477dSHong Zhang     bjtmp   = bj + bdiag[i + 1] + 1;
3445b78a477dSHong Zhang     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
34463c2a7987SHong Zhang 
3447b78a477dSHong Zhang     /* load in initial unfactored row of A */
34483c2a7987SHong Zhang     nz    = ai[r[i] + 1] - ai[r[i]];
34493c2a7987SHong Zhang     ajtmp = aj + ai[r[i]];
34503c2a7987SHong Zhang     v     = aa + ai[r[i]];
34519371c9d4SSatish Balay     for (j = 0; j < nz; j++) { rtmp[ics[*ajtmp++]] = v[j]; }
34523c2a7987SHong Zhang 
3453b78a477dSHong Zhang     /* numerical factorization */
3454b78a477dSHong Zhang     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3455b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3456b78a477dSHong Zhang     k     = 0;
3457b78a477dSHong Zhang     while (k < nzl) {
34583c2a7987SHong Zhang       row        = *bjtmp++;
34593c2a7987SHong Zhang       pc         = rtmp + row;
3460b78a477dSHong Zhang       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3461b78a477dSHong Zhang       multiplier = (*pc) * (*pv);
34623c2a7987SHong Zhang       *pc        = multiplier;
3463b78a477dSHong Zhang       if (PetscAbsScalar(multiplier) > dt) {
3464b78a477dSHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3465b78a477dSHong Zhang         pv = b->a + bdiag[row + 1] + 1;
3466b78a477dSHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3467b78a477dSHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
34689566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
34693c2a7987SHong Zhang       }
3470b78a477dSHong Zhang       k++;
34713c2a7987SHong Zhang     }
34723c2a7987SHong Zhang 
3473b78a477dSHong Zhang     /* finished row so stick it into b->a */
3474b78a477dSHong Zhang     /* L-part */
3475b78a477dSHong Zhang     pv  = b->a + bi[i];
3476b78a477dSHong Zhang     pj  = bj + bi[i];
3477b78a477dSHong Zhang     nzl = bi[i + 1] - bi[i];
34789371c9d4SSatish Balay     for (j = 0; j < nzl; j++) { pv[j] = rtmp[pj[j]]; }
3479b78a477dSHong Zhang 
3480a5b23f4aSJose E. Roman     /* diagonal: invert diagonal entries for simpler triangular solves */
3481b78a477dSHong Zhang     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3482b78a477dSHong Zhang     b->a[bdiag[i]] = 1.0 / rtmp[i];
3483b78a477dSHong Zhang 
3484b78a477dSHong Zhang     /* U-part */
3485b78a477dSHong Zhang     pv  = b->a + bdiag[i + 1] + 1;
3486b78a477dSHong Zhang     pj  = bj + bdiag[i + 1] + 1;
3487b78a477dSHong Zhang     nzu = bdiag[i] - bdiag[i + 1] - 1;
34889371c9d4SSatish Balay     for (j = 0; j < nzu; j++) { pv[j] = rtmp[pj[j]]; }
3489b78a477dSHong Zhang   }
3490b78a477dSHong Zhang 
34919566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
34929566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
34939566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3494b78a477dSHong Zhang 
34959566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
34969566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
34973c2a7987SHong Zhang   if (row_identity && col_identity) {
349835233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
34993c2a7987SHong Zhang   } else {
350035233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
35013c2a7987SHong Zhang   }
3502f4259b30SLisandro Dalcin   C->ops->solveadd          = NULL;
3503f4259b30SLisandro Dalcin   C->ops->solvetranspose    = NULL;
3504f4259b30SLisandro Dalcin   C->ops->solvetransposeadd = NULL;
3505f4259b30SLisandro Dalcin   C->ops->matsolve          = NULL;
35063c2a7987SHong Zhang   C->assembled              = PETSC_TRUE;
35073c2a7987SHong Zhang   C->preallocated           = PETSC_TRUE;
35082205254eSKarl Rupp 
35099566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
35103c2a7987SHong Zhang   PetscFunctionReturn(0);
35113c2a7987SHong Zhang }
3512