xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1b377110cSBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscbt.h>
5c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
63b2fbd54SBarry Smith 
78fa24674SBarry Smith /*
88fa24674SBarry Smith       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9312e372aSBarry Smith 
10312e372aSBarry Smith       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
118fa24674SBarry Smith */
129371c9d4SSatish Balay PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) {
138fa24674SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
14889a8dedSBarry Smith   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
158fa24674SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j;
168fa24674SBarry Smith   const PetscScalar *aa = a->a;
17ace3abfcSBarry Smith   PetscBool         *done;
18889a8dedSBarry Smith   PetscReal          best, past = 0, future;
193a40ed3dSBarry Smith 
208fa24674SBarry Smith   PetscFunctionBegin;
218fa24674SBarry Smith   /* pick initial row */
228fa24674SBarry Smith   best = -1;
238fa24674SBarry Smith   for (i = 0; i < n; i++) {
2475567043SBarry Smith     future = 0.0;
258fa24674SBarry Smith     for (j = ai[i]; j < ai[i + 1]; j++) {
26db4deed7SKarl Rupp       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27db4deed7SKarl Rupp       else past = PetscAbsScalar(aa[j]);
288fa24674SBarry Smith     }
298fa24674SBarry Smith     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
308fa24674SBarry Smith     if (past / future > best) {
318fa24674SBarry Smith       best    = past / future;
328fa24674SBarry Smith       current = i;
338fa24674SBarry Smith     }
348fa24674SBarry Smith   }
358fa24674SBarry Smith 
369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &done));
379566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(done, n));
389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &order));
398fa24674SBarry Smith   order[0] = current;
408fa24674SBarry Smith   for (i = 0; i < n - 1; i++) {
418fa24674SBarry Smith     done[current] = PETSC_TRUE;
428fa24674SBarry Smith     best          = -1;
438fa24674SBarry Smith     /* loop over all neighbors of current pivot */
448fa24674SBarry Smith     for (j = ai[current]; j < ai[current + 1]; j++) {
458fa24674SBarry Smith       jj = aj[j];
468fa24674SBarry Smith       if (done[jj]) continue;
478fa24674SBarry Smith       /* loop over columns of potential next row computing weights for below and above diagonal */
488fa24674SBarry Smith       past = future = 0.0;
498fa24674SBarry Smith       for (k = ai[jj]; k < ai[jj + 1]; k++) {
508fa24674SBarry Smith         kk = aj[k];
518fa24674SBarry Smith         if (done[kk]) past += PetscAbsScalar(aa[k]);
528fa24674SBarry Smith         else if (kk != jj) future += PetscAbsScalar(aa[k]);
538fa24674SBarry Smith       }
548fa24674SBarry Smith       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
558fa24674SBarry Smith       if (past / future > best) {
568fa24674SBarry Smith         best       = past / future;
578fa24674SBarry Smith         newcurrent = jj;
588fa24674SBarry Smith       }
598fa24674SBarry Smith     }
608fa24674SBarry Smith     if (best == -1) { /* no neighbors to select from so select best of all that remain */
618fa24674SBarry Smith       best = -1;
628fa24674SBarry Smith       for (k = 0; k < n; k++) {
638fa24674SBarry Smith         if (done[k]) continue;
6475567043SBarry Smith         future = 0.0;
6575567043SBarry Smith         past   = 0.0;
668fa24674SBarry Smith         for (j = ai[k]; j < ai[k + 1]; j++) {
678fa24674SBarry Smith           kk = aj[j];
688fa24674SBarry Smith           if (done[kk]) past += PetscAbsScalar(aa[j]);
698fa24674SBarry Smith           else if (kk != k) future += PetscAbsScalar(aa[j]);
708fa24674SBarry Smith         }
718fa24674SBarry Smith         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
728fa24674SBarry Smith         if (past / future > best) {
738fa24674SBarry Smith           best       = past / future;
748fa24674SBarry Smith           newcurrent = k;
758fa24674SBarry Smith         }
768fa24674SBarry Smith       }
778fa24674SBarry Smith     }
7808401ef6SPierre Jolivet     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
798fa24674SBarry Smith     current      = newcurrent;
808fa24674SBarry Smith     order[i + 1] = current;
818fa24674SBarry Smith   }
829566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
838fa24674SBarry Smith   *icol = *irow;
849566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)*irow));
859566063dSJacob Faibussowitsch   PetscCall(PetscFree(done));
869566063dSJacob Faibussowitsch   PetscCall(PetscFree(order));
87d64ed03dSBarry Smith   PetscFunctionReturn(0);
883b2fbd54SBarry Smith }
893b2fbd54SBarry Smith 
909371c9d4SSatish Balay static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) {
914ac6704cSBarry Smith   PetscFunctionBegin;
924ac6704cSBarry Smith   *type = MATSOLVERPETSC;
934ac6704cSBarry Smith   PetscFunctionReturn(0);
944ac6704cSBarry Smith }
954ac6704cSBarry Smith 
969371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) {
97d0f46423SBarry Smith   PetscInt n = A->rmap->n;
98b24902e0SBarry Smith 
99b24902e0SBarry Smith   PetscFunctionBegin;
100891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX)
101b94d7dedSBarry Smith   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
102891bceaeSHong Zhang #endif
1039566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
1049566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
105599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
1069566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
1072205254eSKarl Rupp 
10835233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
10935233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
1102205254eSKarl Rupp 
1119566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
1129566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
1139566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
1149566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
115b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1169566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
1179566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
1182205254eSKarl Rupp 
11935233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
12035233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
1219566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
1229566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
123e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124d5f3da31SBarry Smith   (*B)->factortype = ftype;
12500c67f3bSHong Zhang 
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
1279566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
128f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
1299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
130b24902e0SBarry Smith   PetscFunctionReturn(0);
131b24902e0SBarry Smith }
132b24902e0SBarry Smith 
1339371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
134416022c9SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
135289bc588SBarry Smith   IS                 isicol;
1365d0c19d7SBarry Smith   const PetscInt    *r, *ic;
1375d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
1381393bc97SHong Zhang   PetscInt          *bi, *bj, *ajtmp;
1391393bc97SHong Zhang   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
1409e046878SBarry Smith   PetscReal          f;
1414875ba38SHong Zhang   PetscInt           nlnk, *lnk, k, **bi_ptr;
1420298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1431393bc97SHong Zhang   PetscBT            lnkbt;
144ece542f9SHong Zhang   PetscBool          missing;
145289bc588SBarry Smith 
1463a40ed3dSBarry Smith   PetscFunctionBegin;
14708401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
1489566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
14928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
150ece542f9SHong Zhang 
1519566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1529566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
1539566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
154289bc588SBarry Smith 
155289bc588SBarry Smith   /* get new row pointers */
1569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
1571393bc97SHong Zhang   bi[0] = 0;
1581393bc97SHong Zhang 
1591393bc97SHong Zhang   /* bdiag is location of diagonal in factor */
1609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1611393bc97SHong Zhang   bdiag[0] = 0;
1621393bc97SHong Zhang 
1631393bc97SHong Zhang   /* linked list for storing column indices of the active row */
1641393bc97SHong Zhang   nlnk = n + 1;
1659566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
1661393bc97SHong Zhang 
1679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
1681393bc97SHong Zhang 
1691393bc97SHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
170d3d32019SBarry Smith   f = info->fill;
171aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
1729566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1731393bc97SHong Zhang   current_space = free_space;
174289bc588SBarry Smith 
175289bc588SBarry Smith   for (i = 0; i < n; i++) {
1761393bc97SHong Zhang     /* copy previous fill into linked list */
177289bc588SBarry Smith     nzi   = 0;
1781393bc97SHong Zhang     nnz   = ai[r[i] + 1] - ai[r[i]];
1791393bc97SHong Zhang     ajtmp = aj + ai[r[i]];
1809566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1811393bc97SHong Zhang     nzi += nlnk;
1821393bc97SHong Zhang 
1831393bc97SHong Zhang     /* add pivot rows into linked list */
1841393bc97SHong Zhang     row = lnk[n];
1851393bc97SHong Zhang     while (row < i) {
186d1170560SHong Zhang       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187d1170560SHong Zhang       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
1889566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
1891393bc97SHong Zhang       nzi += nlnk;
1901393bc97SHong Zhang       row = lnk[row];
191289bc588SBarry Smith     }
1921393bc97SHong Zhang     bi[i + 1] = bi[i] + nzi;
1931393bc97SHong Zhang     im[i]     = nzi;
1941393bc97SHong Zhang 
1951393bc97SHong Zhang     /* mark bdiag */
1961393bc97SHong Zhang     nzbd = 0;
1971393bc97SHong Zhang     nnz  = nzi;
1981393bc97SHong Zhang     k    = lnk[n];
1991393bc97SHong Zhang     while (nnz-- && k < i) {
2001393bc97SHong Zhang       nzbd++;
2011393bc97SHong Zhang       k = lnk[k];
2021393bc97SHong Zhang     }
2031393bc97SHong Zhang     bdiag[i] = bi[i] + nzbd;
2041393bc97SHong Zhang 
2051393bc97SHong Zhang     /* if free space is not available, make more free space */
2061393bc97SHong Zhang     if (current_space->local_remaining < nzi) {
207f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
2089566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2091393bc97SHong Zhang       reallocs++;
2101393bc97SHong Zhang     }
2111393bc97SHong Zhang 
2121393bc97SHong Zhang     /* copy data into free space, then initialize lnk */
2139566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
2142205254eSKarl Rupp 
2151393bc97SHong Zhang     bi_ptr[i] = current_space->array;
2161393bc97SHong Zhang     current_space->array += nzi;
2171393bc97SHong Zhang     current_space->local_used += nzi;
2181393bc97SHong Zhang     current_space->local_remaining -= nzi;
219289bc588SBarry Smith   }
2206cf91177SBarry Smith #if defined(PETSC_USE_INFO)
221464e8d44SSatish Balay   if (ai[n] != 0) {
2221393bc97SHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2239566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
2269566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
22705bf559cSSatish Balay   } else {
2289566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
22905bf559cSSatish Balay   }
23063ba0a88SBarry Smith #endif
2312fb3ed76SBarry Smith 
2329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
2339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
2341987afe7SBarry Smith 
2351393bc97SHong Zhang   /* destroy list of free space and other temporary array(s) */
2369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2379566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
2389566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
2399566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
240289bc588SBarry Smith 
241289bc588SBarry Smith   /* put together the new matrix */
2429566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
243719d5645SBarry Smith   b = (Mat_SeqAIJ *)(B)->data;
2442205254eSKarl Rupp 
245e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
246e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
2477c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
2482205254eSKarl Rupp 
2499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
2501393bc97SHong Zhang   b->j    = bj;
2511393bc97SHong Zhang   b->i    = bi;
2521393bc97SHong Zhang   b->diag = bdiag;
253f4259b30SLisandro Dalcin   b->ilen = NULL;
254f4259b30SLisandro Dalcin   b->imax = NULL;
255416022c9SBarry Smith   b->row  = isrow;
256416022c9SBarry Smith   b->col  = iscol;
2579566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
2589566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
25982bf6240SBarry Smith   b->icol = isicol;
2609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2611393bc97SHong Zhang 
2621393bc97SHong Zhang   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
2631393bc97SHong Zhang   b->maxnz = b->nz = bi[n];
2647fd98bd6SLois Curfman McInnes 
265d5f3da31SBarry Smith   (B)->factortype            = MAT_FACTOR_LU;
266719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
267719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
268703deb49SSatish Balay 
2699f7d0b68SHong Zhang   if (ai[n]) {
270719d5645SBarry Smith     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
271eea2913aSSatish Balay   } else {
272719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
273eea2913aSSatish Balay   }
274ad04f41aSHong Zhang   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275ad540459SPierre Jolivet   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2763a40ed3dSBarry Smith   PetscFunctionReturn(0);
277289bc588SBarry Smith }
2781393bc97SHong Zhang 
2799371c9d4SSatish Balay PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
280ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
281ce3d78c0SShri Abhyankar   IS                 isicol;
2828758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
2838758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
2848758e1faSBarry Smith   PetscInt          *bi, *bj;
285ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
286ce3d78c0SShri Abhyankar   PetscReal          f;
287ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
2880298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
289ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
290ece542f9SHong Zhang   PetscBool          missing;
291ce3d78c0SShri Abhyankar 
292ce3d78c0SShri Abhyankar   PetscFunctionBegin;
29308401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
2949566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
29528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
296ece542f9SHong Zhang 
2979566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
2989566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
2999566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
300ce3d78c0SShri Abhyankar 
3011bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
3029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
3039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
304a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
3059b48462bSShri Abhyankar 
3069b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
3079b48462bSShri Abhyankar   nlnk = n + 1;
3089566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3099b48462bSShri Abhyankar 
3109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
3119b48462bSShri Abhyankar 
3129b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
3139b48462bSShri Abhyankar   f = info->fill;
314aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
3159566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
3169b48462bSShri Abhyankar   current_space = free_space;
3179b48462bSShri Abhyankar 
3189b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
3199b48462bSShri Abhyankar     /* copy previous fill into linked list */
3209b48462bSShri Abhyankar     nzi   = 0;
3219b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
3229b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
3239566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3249b48462bSShri Abhyankar     nzi += nlnk;
3259b48462bSShri Abhyankar 
3269b48462bSShri Abhyankar     /* add pivot rows into linked list */
3279b48462bSShri Abhyankar     row = lnk[n];
3289b48462bSShri Abhyankar     while (row < i) {
3299b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
3309b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
3319566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
3329b48462bSShri Abhyankar       nzi += nlnk;
3339b48462bSShri Abhyankar       row = lnk[row];
3349b48462bSShri Abhyankar     }
3359b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
3369b48462bSShri Abhyankar     im[i]     = nzi;
3379b48462bSShri Abhyankar 
3389b48462bSShri Abhyankar     /* mark bdiag */
3399b48462bSShri Abhyankar     nzbd = 0;
3409b48462bSShri Abhyankar     nnz  = nzi;
3419b48462bSShri Abhyankar     k    = lnk[n];
3429b48462bSShri Abhyankar     while (nnz-- && k < i) {
3439b48462bSShri Abhyankar       nzbd++;
3449b48462bSShri Abhyankar       k = lnk[k];
3459b48462bSShri Abhyankar     }
3469b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
3479b48462bSShri Abhyankar 
3489b48462bSShri Abhyankar     /* if free space is not available, make more free space */
3499b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
3502f18eb33SBarry Smith       /* estimated additional space needed */
3511df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
3529566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
3539b48462bSShri Abhyankar       reallocs++;
3549b48462bSShri Abhyankar     }
3559b48462bSShri Abhyankar 
3569b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
3579566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
3582205254eSKarl Rupp 
3599b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
3609b48462bSShri Abhyankar     current_space->array += nzi;
3619b48462bSShri Abhyankar     current_space->local_used += nzi;
3629b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
3639b48462bSShri Abhyankar   }
3649b48462bSShri Abhyankar 
3659566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3669566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3679b48462bSShri Abhyankar 
3689263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
3699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
3709566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
3719566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
3739b48462bSShri Abhyankar 
3749b48462bSShri Abhyankar   /* put together the new matrix */
3759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3769b48462bSShri Abhyankar   b = (Mat_SeqAIJ *)(B)->data;
3772205254eSKarl Rupp 
3789b48462bSShri Abhyankar   b->free_a       = PETSC_TRUE;
3799b48462bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
3809b48462bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
3812205254eSKarl Rupp 
3829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
3832205254eSKarl Rupp 
3849b48462bSShri Abhyankar   b->j    = bj;
3859b48462bSShri Abhyankar   b->i    = bi;
3869b48462bSShri Abhyankar   b->diag = bdiag;
387f4259b30SLisandro Dalcin   b->ilen = NULL;
388f4259b30SLisandro Dalcin   b->imax = NULL;
3899b48462bSShri Abhyankar   b->row  = isrow;
3909b48462bSShri Abhyankar   b->col  = iscol;
3919566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
3929566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3939b48462bSShri Abhyankar   b->icol = isicol;
3949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3959b48462bSShri Abhyankar 
3969b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
3979b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
3982205254eSKarl Rupp 
399d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
400f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
401f284f12aSHong Zhang   B->info.fill_ratio_given = f;
4029b48462bSShri Abhyankar 
4039b48462bSShri Abhyankar   if (ai[n]) {
404f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
4059b48462bSShri Abhyankar   } else {
406f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
4079b48462bSShri Abhyankar   }
4089263d837SHong Zhang #if defined(PETSC_USE_INFO)
4099263d837SHong Zhang   if (ai[n] != 0) {
4109263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
4119566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
4129566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
4139566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
4149566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
4159263d837SHong Zhang   } else {
4169566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
4179263d837SHong Zhang   }
4189263d837SHong Zhang #endif
41935233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
420ad540459SPierre Jolivet   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
4219566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
4229b48462bSShri Abhyankar   PetscFunctionReturn(0);
4239b48462bSShri Abhyankar }
4249b48462bSShri Abhyankar 
4255b5bc046SBarry Smith /*
4265b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
4275b5bc046SBarry Smith */
4289371c9d4SSatish Balay PetscErrorCode MatFactorDumpMatrix(Mat A) {
429ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
4305b5bc046SBarry Smith 
4315b5bc046SBarry Smith   PetscFunctionBegin;
4329566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
4335b5bc046SBarry Smith   if (flg) {
4345b5bc046SBarry Smith     PetscViewer viewer;
4355b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
4365b5bc046SBarry Smith 
4379566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
4389566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
4399566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
4409566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
4415b5bc046SBarry Smith   }
4425b5bc046SBarry Smith   PetscFunctionReturn(0);
4435b5bc046SBarry Smith }
4445b5bc046SBarry Smith 
4459371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
446c9c72f8fSHong Zhang   Mat              C = B;
447c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
448c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
449c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
450bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
451bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
452bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
453bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
454bbd65245SShri Abhyankar   const MatScalar *aa = a->a, *v;
455ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
456d90ac83dSHong Zhang   FactorShiftCtx   sctx;
4574f81c4b7SBarry Smith   const PetscInt  *ddiag;
458d4ad06f7SHong Zhang   PetscReal        rs;
459d4ad06f7SHong Zhang   MatScalar        d;
460d4ad06f7SHong Zhang 
461c9c72f8fSHong Zhang   PetscFunctionBegin;
462d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4639566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
464d4ad06f7SHong Zhang 
465f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
466d4ad06f7SHong Zhang     ddiag          = a->diag;
467d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
468d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
469d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
470d4ad06f7SHong Zhang       d  = (aa)[ddiag[i]];
471d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
472d4ad06f7SHong Zhang       v  = aa + ai[i];
473d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
4742205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
475d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
476d4ad06f7SHong Zhang     }
477d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
478d4ad06f7SHong Zhang     sctx.nshift_max = 5;
479d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
480d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
481d4ad06f7SHong Zhang   }
482d4ad06f7SHong Zhang 
4839566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4849566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
486c9c72f8fSHong Zhang   ics = ic;
487c9c72f8fSHong Zhang 
488d4ad06f7SHong Zhang   do {
48907b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
490c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
491c9c72f8fSHong Zhang       /* zero rtmp */
492c9c72f8fSHong Zhang       /* L part */
493c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
494c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
495c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
496c9c72f8fSHong Zhang 
497c9c72f8fSHong Zhang       /* U part */
49862fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
49962fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
500813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
501813a1f2bSShri Abhyankar 
502813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
503813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
504813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
505813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
506ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
507813a1f2bSShri Abhyankar       /* ZeropivotApply() */
50898a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
509813a1f2bSShri Abhyankar 
510813a1f2bSShri Abhyankar       /* elimination */
511813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
512813a1f2bSShri Abhyankar       row   = *bjtmp++;
513813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
514f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
515813a1f2bSShri Abhyankar         pc = rtmp + row;
516813a1f2bSShri Abhyankar         if (*pc != 0.0) {
517813a1f2bSShri Abhyankar           pv         = b->a + bdiag[row];
518813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
519813a1f2bSShri Abhyankar           *pc        = multiplier;
5202205254eSKarl Rupp 
52162fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
52262fbd6afSShri Abhyankar           pv = b->a + bdiag[row + 1] + 1;
52362fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
5242205254eSKarl Rupp 
525813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5269566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
527813a1f2bSShri Abhyankar         }
528f268cba8SShri Abhyankar         row = *bjtmp++;
529813a1f2bSShri Abhyankar       }
530813a1f2bSShri Abhyankar 
531813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
532813a1f2bSShri Abhyankar       rs = 0.0;
533813a1f2bSShri Abhyankar       /* L part */
534813a1f2bSShri Abhyankar       pv = b->a + bi[i];
535813a1f2bSShri Abhyankar       pj = b->j + bi[i];
536813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
537813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5389371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5399371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
540813a1f2bSShri Abhyankar       }
541813a1f2bSShri Abhyankar 
542813a1f2bSShri Abhyankar       /* U part */
54362fbd6afSShri Abhyankar       pv = b->a + bdiag[i + 1] + 1;
54462fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
54562fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
546813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5479371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5489371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
549813a1f2bSShri Abhyankar       }
550813a1f2bSShri Abhyankar 
551813a1f2bSShri Abhyankar       sctx.rs = rs;
552813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
5539566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
55407b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
55507b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
556813a1f2bSShri Abhyankar 
557a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
558813a1f2bSShri Abhyankar       pv  = b->a + bdiag[i];
559813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
560813a1f2bSShri Abhyankar 
561813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
562813a1f2bSShri Abhyankar 
5638ff23777SHong Zhang     /* MatPivotRefine() */
56407b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
565813a1f2bSShri Abhyankar       /*
566813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
567813a1f2bSShri Abhyankar        * then try lower shift
568813a1f2bSShri Abhyankar        */
569813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
570813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
571813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
57207b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
573813a1f2bSShri Abhyankar       sctx.nshift++;
574813a1f2bSShri Abhyankar     }
57507b50cabSHong Zhang   } while (sctx.newshift);
576813a1f2bSShri Abhyankar 
5779566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5789566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5799566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
580d3ac4fa3SBarry Smith 
5819566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5829566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
583abb87a52SBarry Smith   if (b->inode.size) {
584abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
585abb87a52SBarry Smith   } else if (row_identity && col_identity) {
58635233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
587813a1f2bSShri Abhyankar   } else {
58835233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
589813a1f2bSShri Abhyankar   }
59035233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
59135233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
59235233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
59335233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
594813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
595813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
5962205254eSKarl Rupp 
5979566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
598813a1f2bSShri Abhyankar 
59914d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
600813a1f2bSShri Abhyankar   if (sctx.nshift) {
601f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
6029566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
603f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
6049566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
605f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
6069566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
607813a1f2bSShri Abhyankar     }
608813a1f2bSShri Abhyankar   }
609813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
610813a1f2bSShri Abhyankar }
611813a1f2bSShri Abhyankar 
6129371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
613719d5645SBarry Smith   Mat              C = B;
614416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
61582bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
6165d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
617d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
618d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
619d278edc7SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
620d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
621d278edc7SBarry Smith   const MatScalar *v, *aa = a->a;
62233f9893dSHong Zhang   PetscReal        rs = 0.0;
623d90ac83dSHong Zhang   FactorShiftCtx   sctx;
6244f81c4b7SBarry Smith   const PetscInt  *ddiag;
625ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
626289bc588SBarry Smith 
6273a40ed3dSBarry Smith   PetscFunctionBegin;
628504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
6299566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
630ada7143aSSatish Balay 
631f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
63242898933SHong Zhang     ddiag          = a->diag;
6339f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
6346cc28720Svictorle     for (i = 0; i < n; i++) {
6359f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
6369f7d0b68SHong Zhang       d  = (aa)[ddiag[i]];
6371a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
6389f7d0b68SHong Zhang       v  = aa + ai[i];
6399f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
6402205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
6411a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
6426cc28720Svictorle     }
643118f5789SBarry Smith     sctx.shift_top *= 1.1;
644d2276718SHong Zhang     sctx.nshift_max = 5;
645d2276718SHong Zhang     sctx.shift_lo   = 0.;
646d2276718SHong Zhang     sctx.shift_hi   = 1.;
647a0a356efSHong Zhang   }
648a0a356efSHong Zhang 
6499566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6509566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
652504a3fadSHong Zhang   ics = ic;
653504a3fadSHong Zhang 
654085256b3SBarry Smith   do {
65507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
656289bc588SBarry Smith     for (i = 0; i < n; i++) {
657b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
658b3bf805bSHong Zhang       bjtmp = bj + bi[i];
6599f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
660289bc588SBarry Smith 
661289bc588SBarry Smith       /* load in initial (unfactored row) */
6629f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
6639f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
6649f7d0b68SHong Zhang       v     = aa + ai[r[i]];
665ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
666d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
667289bc588SBarry Smith 
668b3bf805bSHong Zhang       row = *bjtmp++;
669f2582111SSatish Balay       while (row < i) {
6708c37ef55SBarry Smith         pc = rtmp + row;
6718c37ef55SBarry Smith         if (*pc != 0.0) {
672010a20caSHong Zhang           pv         = b->a + diag_offset[row];
673010a20caSHong Zhang           pj         = b->j + diag_offset[row] + 1;
67435aab85fSBarry Smith           multiplier = *pc / *pv++;
6758c37ef55SBarry Smith           *pc        = multiplier;
676b3bf805bSHong Zhang           nz         = bi[row + 1] - diag_offset[row] - 1;
6779f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6789566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
679289bc588SBarry Smith         }
680b3bf805bSHong Zhang         row = *bjtmp++;
681289bc588SBarry Smith       }
682416022c9SBarry Smith       /* finished row so stick it into b->a */
683b3bf805bSHong Zhang       pv   = b->a + bi[i];
684b3bf805bSHong Zhang       pj   = b->j + bi[i];
685b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
686b3bf805bSHong Zhang       diag = diag_offset[i] - bi[i];
687e57ff13aSBarry Smith       rs   = 0.0;
688d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
6899f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
6909f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
691d3d32019SBarry Smith       }
692e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
693d2276718SHong Zhang 
694d2276718SHong Zhang       sctx.rs = rs;
695d2276718SHong Zhang       sctx.pv = pv[diag];
6969566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
69707b50cabSHong Zhang       if (sctx.newshift) break;
698504a3fadSHong Zhang       pv[diag] = sctx.pv;
69935aab85fSBarry Smith     }
700d2276718SHong Zhang 
70107b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
7026cc28720Svictorle       /*
7036c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
7046cc28720Svictorle        * then try lower shift
7056cc28720Svictorle        */
7060481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
7070481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
7080481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
70907b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
710d2276718SHong Zhang       sctx.nshift++;
7116cc28720Svictorle     }
71207b50cabSHong Zhang   } while (sctx.newshift);
7130f11f9aeSSatish Balay 
714a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
715ad540459SPierre Jolivet   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
7169566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7179566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7189566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
719d3ac4fa3SBarry Smith 
7209566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
7219566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
7228d8c7c43SBarry Smith   if (row_identity && col_identity) {
723ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
7248d8c7c43SBarry Smith   } else {
725ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
726db4efbfdSBarry Smith   }
727ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
728ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
729ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
730ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
7312205254eSKarl Rupp 
732c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
7335c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
7342205254eSKarl Rupp 
7359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
736d2276718SHong Zhang   if (sctx.nshift) {
737f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7389566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
739f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7409566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
7416cc28720Svictorle     }
7420697b6caSHong Zhang   }
743d3ac4fa3SBarry Smith   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
744d3ac4fa3SBarry Smith   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
7452205254eSKarl Rupp 
7469566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
7473a40ed3dSBarry Smith   PetscFunctionReturn(0);
748289bc588SBarry Smith }
7490889b5dcSvictorle 
750137fb511SHong Zhang /*
751137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
752137fb511SHong Zhang    Input:
753137fb511SHong Zhang      A - original matrix
754137fb511SHong Zhang    Output;
755137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
756137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
757137fb511SHong Zhang          a->a reordered accordingly with a->j
758137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
759137fb511SHong Zhang */
7609371c9d4SSatish Balay PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) {
761b051339dSHong Zhang   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
762b051339dSHong Zhang   IS               isrow = a->row, isicol = a->icol;
7635d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
7645d0c19d7SBarry Smith   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
7655d0c19d7SBarry Smith   PetscInt        *ajtmp, nz, row;
766b051339dSHong Zhang   PetscInt        *diag = a->diag, nbdiag, *pj;
767a77337e4SBarry Smith   PetscScalar     *rtmp, *pc, multiplier, d;
768504a3fadSHong Zhang   MatScalar       *pv, *v;
769137fb511SHong Zhang   PetscReal        rs;
770d90ac83dSHong Zhang   FactorShiftCtx   sctx;
771504a3fadSHong Zhang   const MatScalar *aa = a->a, *vtmp;
772137fb511SHong Zhang 
773137fb511SHong Zhang   PetscFunctionBegin;
77408401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
775504a3fadSHong Zhang 
776504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
7779566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
778504a3fadSHong Zhang 
779504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
780504a3fadSHong Zhang     const PetscInt *ddiag = a->diag;
781504a3fadSHong Zhang     sctx.shift_top        = info->zeropivot;
782504a3fadSHong Zhang     for (i = 0; i < n; i++) {
783504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
784504a3fadSHong Zhang       d    = (aa)[ddiag[i]];
785504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
786504a3fadSHong Zhang       vtmp = aa + ai[i];
787504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
7882205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
789504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
790504a3fadSHong Zhang     }
791504a3fadSHong Zhang     sctx.shift_top *= 1.1;
792504a3fadSHong Zhang     sctx.nshift_max = 5;
793504a3fadSHong Zhang     sctx.shift_lo   = 0.;
794504a3fadSHong Zhang     sctx.shift_hi   = 1.;
795504a3fadSHong Zhang   }
796504a3fadSHong Zhang 
7979566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
7989566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
7999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
8009566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
801b051339dSHong Zhang   ics = ic;
802137fb511SHong Zhang 
803504a3fadSHong Zhang #if defined(MV)
80475567043SBarry Smith   sctx.shift_top      = 0.;
805e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
80675567043SBarry Smith   sctx.shift_lo       = 0.;
80775567043SBarry Smith   sctx.shift_hi       = 0.;
80875567043SBarry Smith   sctx.shift_fraction = 0.;
809137fb511SHong Zhang 
810f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
81175567043SBarry Smith     sctx.shift_top = 0.;
812137fb511SHong Zhang     for (i = 0; i < n; i++) {
813137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814b051339dSHong Zhang       d  = (a->a)[diag[i]];
815137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
816b051339dSHong Zhang       v  = a->a + ai[i];
817b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
8182205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
819137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
820137fb511SHong Zhang     }
821137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
822137fb511SHong Zhang     sctx.shift_top *= 1.1;
823137fb511SHong Zhang     sctx.nshift_max = 5;
824137fb511SHong Zhang     sctx.shift_lo   = 0.;
825137fb511SHong Zhang     sctx.shift_hi   = 1.;
826137fb511SHong Zhang   }
827137fb511SHong Zhang 
82875567043SBarry Smith   sctx.shift_amount = 0.;
829137fb511SHong Zhang   sctx.nshift       = 0;
830504a3fadSHong Zhang #endif
831504a3fadSHong Zhang 
832137fb511SHong Zhang   do {
83307b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
834137fb511SHong Zhang     for (i = 0; i < n; i++) {
835b051339dSHong Zhang       /* load in initial unfactored row */
836b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
837b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
838b051339dSHong Zhang       v     = a->a + ai[r[i]];
839137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
840b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
8419566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
842137fb511SHong Zhang 
843b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
844137fb511SHong Zhang       for (j = 0; j < nz; j++) {
845137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
846b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
847137fb511SHong Zhang       }
848137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
849137fb511SHong Zhang 
850b051339dSHong Zhang       row = *ajtmp++;
851137fb511SHong Zhang       while (row < i) {
852137fb511SHong Zhang         pc = rtmp + row;
853137fb511SHong Zhang         if (*pc != 0.0) {
854b051339dSHong Zhang           pv = a->a + diag[r[row]];
855b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
856137fb511SHong Zhang 
857137fb511SHong Zhang           multiplier = *pc / *pv++;
858137fb511SHong Zhang           *pc        = multiplier;
859b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
860b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
8619566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
862137fb511SHong Zhang         }
863b051339dSHong Zhang         row = *ajtmp++;
864137fb511SHong Zhang       }
865b051339dSHong Zhang       /* finished row so overwrite it onto a->a */
866b051339dSHong Zhang       pv     = a->a + ai[r[i]];
867b051339dSHong Zhang       pj     = aj + ai[r[i]];
868b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
869b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
870137fb511SHong Zhang 
871137fb511SHong Zhang       rs = 0.0;
872137fb511SHong Zhang       for (j = 0; j < nz; j++) {
873b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
874b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
875137fb511SHong Zhang       }
876137fb511SHong Zhang 
877137fb511SHong Zhang       sctx.rs = rs;
878b051339dSHong Zhang       sctx.pv = pv[nbdiag];
8799566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
88007b50cabSHong Zhang       if (sctx.newshift) break;
881504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
882137fb511SHong Zhang     }
883137fb511SHong Zhang 
88407b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
885137fb511SHong Zhang       /*
886137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
887137fb511SHong Zhang        * then try lower shift
888137fb511SHong Zhang        */
8890481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
8900481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
8910481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
89207b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
893137fb511SHong Zhang       sctx.nshift++;
894137fb511SHong Zhang     }
89507b50cabSHong Zhang   } while (sctx.newshift);
896137fb511SHong Zhang 
897a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
898ad540459SPierre Jolivet   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
899137fb511SHong Zhang 
9009566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
9019566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
9029566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
9032205254eSKarl Rupp 
904b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
905ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
906ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
907ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
9082205254eSKarl Rupp 
909b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
9105c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
9112205254eSKarl Rupp 
9129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
913137fb511SHong Zhang   if (sctx.nshift) {
914f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
9159566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
916f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
9179566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
918137fb511SHong Zhang     }
919137fb511SHong Zhang   }
920137fb511SHong Zhang   PetscFunctionReturn(0);
921137fb511SHong Zhang }
922137fb511SHong Zhang 
9230a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
9249371c9d4SSatish Balay PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) {
925416022c9SBarry Smith   Mat C;
926416022c9SBarry Smith 
9273a40ed3dSBarry Smith   PetscFunctionBegin;
9289566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
9299566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
9309566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
9312205254eSKarl Rupp 
932db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
933db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
9342205254eSKarl Rupp 
9359566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
9363a40ed3dSBarry Smith   PetscFunctionReturn(0);
937da3a660dSBarry Smith }
9380a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */
9391d098868SHong Zhang 
9409371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
941416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
942416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
9435d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
9445d0c19d7SBarry Smith   PetscInt           nz;
9455d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
946dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
947d9fead3dSBarry Smith   const PetscScalar *b;
948dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
9498c37ef55SBarry Smith 
9503a40ed3dSBarry Smith   PetscFunctionBegin;
9513a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
95291bf9adeSBarry Smith 
9539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
9549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
955416022c9SBarry Smith   tmp = a->solve_work;
9568c37ef55SBarry Smith 
9579371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
9589371c9d4SSatish Balay   r = rout;
9599371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
9609371c9d4SSatish Balay   c = cout + (n - 1);
9618c37ef55SBarry Smith 
9628c37ef55SBarry Smith   /* forward solve the lower triangular */
9638c37ef55SBarry Smith   tmp[0] = b[*r++];
964010a20caSHong Zhang   tmps   = tmp;
9658c37ef55SBarry Smith   for (i = 1; i < n; i++) {
966010a20caSHong Zhang     v   = aa + ai[i];
967010a20caSHong Zhang     vi  = aj + ai[i];
96853e7425aSBarry Smith     nz  = a->diag[i] - ai[i];
96953e7425aSBarry Smith     sum = b[*r++];
970003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
9718c37ef55SBarry Smith     tmp[i] = sum;
9728c37ef55SBarry Smith   }
9738c37ef55SBarry Smith 
9748c37ef55SBarry Smith   /* backward solve the upper triangular */
9758c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
976010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
977010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
978416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
9798c37ef55SBarry Smith     sum = tmp[i];
980003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
981010a20caSHong Zhang     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
9828c37ef55SBarry Smith   }
9838c37ef55SBarry Smith 
9849566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
9859566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
9869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
9879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
9889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
9893a40ed3dSBarry Smith   PetscFunctionReturn(0);
9908c37ef55SBarry Smith }
991026e39d0SSatish Balay 
9929371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) {
9932bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
9942bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
9955d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
996910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
9975d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
998910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
999910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1000910cf402Sprj-   PetscBool          isdense;
10012bebee5dSHong Zhang 
10022bebee5dSHong Zhang   PetscFunctionBegin;
10032bebee5dSHong Zhang   if (!n) PetscFunctionReturn(0);
10049566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
100528b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1006f9fb9879SHong Zhang   if (X != B) {
10079566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
100828b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1009f9fb9879SHong Zhang   }
10109566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10119566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10129566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10139566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10149371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10159371c9d4SSatish Balay   r = rout;
10169371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10179371c9d4SSatish Balay   c = cout;
1018a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
10192bebee5dSHong Zhang     /* forward solve the lower triangular */
10202bebee5dSHong Zhang     tmp[0] = b[r[0]];
10212bebee5dSHong Zhang     tmps   = tmp;
10222bebee5dSHong Zhang     for (i = 1; i < n; i++) {
10232bebee5dSHong Zhang       v   = aa + ai[i];
10242bebee5dSHong Zhang       vi  = aj + ai[i];
10252bebee5dSHong Zhang       nz  = a->diag[i] - ai[i];
10262bebee5dSHong Zhang       sum = b[r[i]];
1027003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10282bebee5dSHong Zhang       tmp[i] = sum;
10292bebee5dSHong Zhang     }
10302bebee5dSHong Zhang     /* backward solve the upper triangular */
10312bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
10322bebee5dSHong Zhang       v   = aa + a->diag[i] + 1;
10332bebee5dSHong Zhang       vi  = aj + a->diag[i] + 1;
10342bebee5dSHong Zhang       nz  = ai[i + 1] - a->diag[i] - 1;
10352bebee5dSHong Zhang       sum = tmp[i];
1036003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10372bebee5dSHong Zhang       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
10382bebee5dSHong Zhang     }
1039910cf402Sprj-     b += ldb;
1040910cf402Sprj-     x += ldx;
10412bebee5dSHong Zhang   }
10429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10449566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
10459566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
10469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
10472bebee5dSHong Zhang   PetscFunctionReturn(0);
10482bebee5dSHong Zhang }
10492bebee5dSHong Zhang 
10509371c9d4SSatish Balay PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) {
10519bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10529bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
10539bd0847aSShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1054910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10559bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
1056910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
1057910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1058910cf402Sprj-   PetscBool          isdense;
10599bd0847aSShri Abhyankar 
10609bd0847aSShri Abhyankar   PetscFunctionBegin;
10619bd0847aSShri Abhyankar   if (!n) PetscFunctionReturn(0);
10629566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
106328b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1064f9fb9879SHong Zhang   if (X != B) {
10659566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
106628b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1067f9fb9879SHong Zhang   }
10689566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10699566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10709566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10719566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10729371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10739371c9d4SSatish Balay   r = rout;
10749371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10759371c9d4SSatish Balay   c = cout;
10769bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
10779bd0847aSShri Abhyankar     /* forward solve the lower triangular */
10789bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
10799bd0847aSShri Abhyankar     v      = aa;
10809bd0847aSShri Abhyankar     vi     = aj;
10819bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
10829bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
10839bd0847aSShri Abhyankar       sum = b[r[i]];
10849bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
10859bd0847aSShri Abhyankar       tmp[i] = sum;
10869371c9d4SSatish Balay       v += nz;
10879371c9d4SSatish Balay       vi += nz;
10889bd0847aSShri Abhyankar     }
10899bd0847aSShri Abhyankar     /* backward solve the upper triangular */
10909bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
10919bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
10929bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
10939bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
10949bd0847aSShri Abhyankar       sum = tmp[i];
10959bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
10969bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
10979bd0847aSShri Abhyankar     }
1098910cf402Sprj-     b += ldb;
1099910cf402Sprj-     x += ldx;
11009bd0847aSShri Abhyankar   }
11019566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11029566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11039566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
11049566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
11059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
11069bd0847aSShri Abhyankar   PetscFunctionReturn(0);
11079bd0847aSShri Abhyankar }
11089bd0847aSShri Abhyankar 
11099371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) {
1110137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1111137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
11125d0c19d7SBarry Smith   const PetscInt    *r, *c, *rout, *cout;
11135d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
11145d0c19d7SBarry Smith   PetscInt           nz, row;
1115fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1116fdc842d1SBarry Smith   const PetscScalar *b;
1117dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1118137fb511SHong Zhang 
1119137fb511SHong Zhang   PetscFunctionBegin;
1120137fb511SHong Zhang   if (!n) PetscFunctionReturn(0);
1121137fb511SHong Zhang 
11229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1124137fb511SHong Zhang   tmp = a->solve_work;
1125137fb511SHong Zhang 
11269371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11279371c9d4SSatish Balay   r = rout;
11289371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11299371c9d4SSatish Balay   c = cout + (n - 1);
1130137fb511SHong Zhang 
1131137fb511SHong Zhang   /* forward solve the lower triangular */
1132137fb511SHong Zhang   tmp[0] = b[*r++];
1133137fb511SHong Zhang   tmps   = tmp;
1134137fb511SHong Zhang   for (row = 1; row < n; row++) {
1135137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1136137fb511SHong Zhang     v   = aa + ai[i];
1137137fb511SHong Zhang     vi  = aj + ai[i];
1138137fb511SHong Zhang     nz  = a->diag[i] - ai[i];
1139137fb511SHong Zhang     sum = b[*r++];
1140003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1141137fb511SHong Zhang     tmp[row] = sum;
1142137fb511SHong Zhang   }
1143137fb511SHong Zhang 
1144137fb511SHong Zhang   /* backward solve the upper triangular */
1145137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1146137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1147137fb511SHong Zhang     v   = aa + a->diag[i] + 1;
1148137fb511SHong Zhang     vi  = aj + a->diag[i] + 1;
1149137fb511SHong Zhang     nz  = ai[i + 1] - a->diag[i] - 1;
1150137fb511SHong Zhang     sum = tmp[row];
1151003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1152137fb511SHong Zhang     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1153137fb511SHong Zhang   }
1154137fb511SHong Zhang 
11559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11569566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
11599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1160137fb511SHong Zhang   PetscFunctionReturn(0);
1161137fb511SHong Zhang }
1162137fb511SHong Zhang 
1163930ae53cSBarry Smith /* ----------------------------------------------------------- */
1164c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
11659371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
1166930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1167d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1168b377110cSBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1169356650c2SBarry Smith   PetscScalar       *x;
1170356650c2SBarry Smith   const PetscScalar *b;
1171dd6ea824SBarry Smith   const MatScalar   *aa = a->a;
1172aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1173356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1174b377110cSBarry Smith   const PetscInt  *vi;
1175dd6ea824SBarry Smith   const MatScalar *v;
1176dd6ea824SBarry Smith   PetscScalar      sum;
1177d85afc42SSatish Balay #endif
1178930ae53cSBarry Smith 
11793a40ed3dSBarry Smith   PetscFunctionBegin;
11803a40ed3dSBarry Smith   if (!n) PetscFunctionReturn(0);
1181930ae53cSBarry Smith 
11829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1184930ae53cSBarry Smith 
1185aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
11861c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
11871c4feecaSSatish Balay #else
1188930ae53cSBarry Smith   /* forward solve the lower triangular */
1189930ae53cSBarry Smith   x[0] = b[0];
1190930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1191930ae53cSBarry Smith     ai_i = ai[i];
1192930ae53cSBarry Smith     v    = aa + ai_i;
1193930ae53cSBarry Smith     vi   = aj + ai_i;
1194930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1195930ae53cSBarry Smith     sum  = b[i];
1196003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1197930ae53cSBarry Smith     x[i] = sum;
1198930ae53cSBarry Smith   }
1199930ae53cSBarry Smith 
1200930ae53cSBarry Smith   /* backward solve the upper triangular */
1201930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1202930ae53cSBarry Smith     adiag_i = adiag[i];
1203d708749eSBarry Smith     v       = aa + adiag_i + 1;
1204d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1205930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1206930ae53cSBarry Smith     sum     = x[i];
1207003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1208930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1209930ae53cSBarry Smith   }
12101c4feecaSSatish Balay #endif
12119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
12143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1215930ae53cSBarry Smith }
1216930ae53cSBarry Smith 
12179371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) {
1218416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1219416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
122004fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
12215d0c19d7SBarry Smith   PetscInt           nz;
122204fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
122304fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
122404fbf559SBarry Smith   const PetscScalar *b;
1225dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1226da3a660dSBarry Smith 
12273a40ed3dSBarry Smith   PetscFunctionBegin;
12289566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1229da3a660dSBarry Smith 
12309566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12319566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1232416022c9SBarry Smith   tmp = a->solve_work;
1233da3a660dSBarry Smith 
12349371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12359371c9d4SSatish Balay   r = rout;
12369371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12379371c9d4SSatish Balay   c = cout + (n - 1);
1238da3a660dSBarry Smith 
1239da3a660dSBarry Smith   /* forward solve the lower triangular */
1240da3a660dSBarry Smith   tmp[0] = b[*r++];
1241da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1242010a20caSHong Zhang     v   = aa + ai[i];
1243010a20caSHong Zhang     vi  = aj + ai[i];
1244416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
1245da3a660dSBarry Smith     sum = b[*r++];
124604fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1247da3a660dSBarry Smith     tmp[i] = sum;
1248da3a660dSBarry Smith   }
1249da3a660dSBarry Smith 
1250da3a660dSBarry Smith   /* backward solve the upper triangular */
1251da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1252010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1253010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1254416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
1255da3a660dSBarry Smith     sum = tmp[i];
125604fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1257010a20caSHong Zhang     tmp[i] = sum * aa[a->diag[i]];
1258da3a660dSBarry Smith     x[*c--] += tmp[i];
1259da3a660dSBarry Smith   }
1260da3a660dSBarry Smith 
12619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12629566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
12663a40ed3dSBarry Smith   PetscFunctionReturn(0);
1267da3a660dSBarry Smith }
126804fbf559SBarry Smith 
12699371c9d4SSatish Balay PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) {
12703c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
12713c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
12723c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
12733c0119dfSShri Abhyankar   PetscInt           nz;
12743c0119dfSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
12753c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
12763c0119dfSShri Abhyankar   const PetscScalar *b;
12773c0119dfSShri Abhyankar   const MatScalar   *aa = a->a, *v;
12783c0119dfSShri Abhyankar 
12793c0119dfSShri Abhyankar   PetscFunctionBegin;
12809566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
12813c0119dfSShri Abhyankar 
12829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12839566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
12843c0119dfSShri Abhyankar   tmp = a->solve_work;
12853c0119dfSShri Abhyankar 
12869371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12879371c9d4SSatish Balay   r = rout;
12889371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12899371c9d4SSatish Balay   c = cout;
12903c0119dfSShri Abhyankar 
12913c0119dfSShri Abhyankar   /* forward solve the lower triangular */
12923c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
12933c0119dfSShri Abhyankar   v      = aa;
12943c0119dfSShri Abhyankar   vi     = aj;
12953c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
12963c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
12973c0119dfSShri Abhyankar     sum = b[r[i]];
12983c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
12993c0119dfSShri Abhyankar     tmp[i] = sum;
13002205254eSKarl Rupp     v += nz;
13012205254eSKarl Rupp     vi += nz;
13023c0119dfSShri Abhyankar   }
13033c0119dfSShri Abhyankar 
13043c0119dfSShri Abhyankar   /* backward solve the upper triangular */
13053c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
13063c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
13073c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
13083c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
13093c0119dfSShri Abhyankar     sum = tmp[i];
13103c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
13113c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
13123c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
13139371c9d4SSatish Balay     v += nz + 1;
13149371c9d4SSatish Balay     vi += nz + 1;
13153c0119dfSShri Abhyankar   }
13163c0119dfSShri Abhyankar 
13179566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13189566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13219566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
13223c0119dfSShri Abhyankar   PetscFunctionReturn(0);
13233c0119dfSShri Abhyankar }
13243c0119dfSShri Abhyankar 
13259371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
1326416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13272235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
132804fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
132904fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
133004fbf559SBarry Smith   PetscInt           nz;
133104fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1332dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
133304fbf559SBarry Smith   const PetscScalar *b;
1334da3a660dSBarry Smith 
13353a40ed3dSBarry Smith   PetscFunctionBegin;
13369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1338416022c9SBarry Smith   tmp = a->solve_work;
1339da3a660dSBarry Smith 
13409371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13419371c9d4SSatish Balay   r = rout;
13429371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13439371c9d4SSatish Balay   c = cout;
1344da3a660dSBarry Smith 
1345da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
13462235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1347da3a660dSBarry Smith 
1348da3a660dSBarry Smith   /* forward solve the U^T */
1349da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1350010a20caSHong Zhang     v  = aa + diag[i];
1351010a20caSHong Zhang     vi = aj + diag[i] + 1;
1352f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1353c38d4ed2SBarry Smith     s1 = tmp[i];
1354ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
135504fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1356c38d4ed2SBarry Smith     tmp[i] = s1;
1357da3a660dSBarry Smith   }
1358da3a660dSBarry Smith 
1359da3a660dSBarry Smith   /* backward solve the L^T */
1360da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1361010a20caSHong Zhang     v  = aa + diag[i] - 1;
1362010a20caSHong Zhang     vi = aj + diag[i] - 1;
1363f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1364f1af5d2fSBarry Smith     s1 = tmp[i];
136504fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1366da3a660dSBarry Smith   }
1367da3a660dSBarry Smith 
1368da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1369591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1370da3a660dSBarry Smith 
13719566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13729566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1375da3a660dSBarry Smith 
13769566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1378da3a660dSBarry Smith }
1379da3a660dSBarry Smith 
13809371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) {
1381d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1382d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1383d1fa9404SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1384d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1385d1fa9404SShri Abhyankar   PetscInt           nz;
1386d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1387d1fa9404SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1388d1fa9404SShri Abhyankar   const PetscScalar *b;
1389d1fa9404SShri Abhyankar 
1390d1fa9404SShri Abhyankar   PetscFunctionBegin;
13919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1393d1fa9404SShri Abhyankar   tmp = a->solve_work;
1394d1fa9404SShri Abhyankar 
13959371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13969371c9d4SSatish Balay   r = rout;
13979371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13989371c9d4SSatish Balay   c = cout;
1399d1fa9404SShri Abhyankar 
1400d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1401d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1402d1fa9404SShri Abhyankar 
1403d1fa9404SShri Abhyankar   /* forward solve the U^T */
1404d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1405d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1406d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1407d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1408d1fa9404SShri Abhyankar     s1 = tmp[i];
1409d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1410d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1411d1fa9404SShri Abhyankar     tmp[i] = s1;
1412d1fa9404SShri Abhyankar   }
1413d1fa9404SShri Abhyankar 
1414d1fa9404SShri Abhyankar   /* backward solve the L^T */
1415d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1416d1fa9404SShri Abhyankar     v  = aa + ai[i];
1417d1fa9404SShri Abhyankar     vi = aj + ai[i];
1418d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1419d1fa9404SShri Abhyankar     s1 = tmp[i];
1420d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1421d1fa9404SShri Abhyankar   }
1422d1fa9404SShri Abhyankar 
1423d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1424d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1425d1fa9404SShri Abhyankar 
14269566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14279566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1430d1fa9404SShri Abhyankar 
14319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1432d1fa9404SShri Abhyankar   PetscFunctionReturn(0);
1433d1fa9404SShri Abhyankar }
1434d1fa9404SShri Abhyankar 
14359371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) {
1436416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
14372235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
143804fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
143904fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
144004fbf559SBarry Smith   PetscInt           nz;
144104fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1442dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
144304fbf559SBarry Smith   const PetscScalar *b;
14446abc6512SBarry Smith 
14453a40ed3dSBarry Smith   PetscFunctionBegin;
14469566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
14479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1449416022c9SBarry Smith   tmp = a->solve_work;
14506abc6512SBarry Smith 
14519371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14529371c9d4SSatish Balay   r = rout;
14539371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14549371c9d4SSatish Balay   c = cout;
14556abc6512SBarry Smith 
14566abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
14572235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
14586abc6512SBarry Smith 
14596abc6512SBarry Smith   /* forward solve the U^T */
14606abc6512SBarry Smith   for (i = 0; i < n; i++) {
1461010a20caSHong Zhang     v  = aa + diag[i];
1462010a20caSHong Zhang     vi = aj + diag[i] + 1;
1463f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
146404fbf559SBarry Smith     s1 = tmp[i];
146504fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
146604fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
146704fbf559SBarry Smith     tmp[i] = s1;
14686abc6512SBarry Smith   }
14696abc6512SBarry Smith 
14706abc6512SBarry Smith   /* backward solve the L^T */
14716abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1472010a20caSHong Zhang     v  = aa + diag[i] - 1;
1473010a20caSHong Zhang     vi = aj + diag[i] - 1;
1474f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
147504fbf559SBarry Smith     s1 = tmp[i];
147604fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
14776abc6512SBarry Smith   }
14786abc6512SBarry Smith 
14796abc6512SBarry Smith   /* copy tmp into x according to permutation */
14806abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
14816abc6512SBarry Smith 
14829566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14839566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
14866abc6512SBarry Smith 
14879566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14883a40ed3dSBarry Smith   PetscFunctionReturn(0);
1489da3a660dSBarry Smith }
149004fbf559SBarry Smith 
14919371c9d4SSatish Balay PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) {
1492533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1493533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1494533e6011SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1495533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1496533e6011SShri Abhyankar   PetscInt           nz;
1497533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1498533e6011SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1499533e6011SShri Abhyankar   const PetscScalar *b;
1500533e6011SShri Abhyankar 
1501533e6011SShri Abhyankar   PetscFunctionBegin;
15029566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
15039566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
15049566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1505533e6011SShri Abhyankar   tmp = a->solve_work;
1506533e6011SShri Abhyankar 
15079371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
15089371c9d4SSatish Balay   r = rout;
15099371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
15109371c9d4SSatish Balay   c = cout;
1511533e6011SShri Abhyankar 
1512533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1513533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1514533e6011SShri Abhyankar 
1515533e6011SShri Abhyankar   /* forward solve the U^T */
1516533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1517533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1518533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1519533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1520533e6011SShri Abhyankar     s1 = tmp[i];
1521533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1522533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1523533e6011SShri Abhyankar     tmp[i] = s1;
1524533e6011SShri Abhyankar   }
1525533e6011SShri Abhyankar 
1526533e6011SShri Abhyankar   /* backward solve the L^T */
1527533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1528533e6011SShri Abhyankar     v  = aa + ai[i];
1529533e6011SShri Abhyankar     vi = aj + ai[i];
1530533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1531533e6011SShri Abhyankar     s1 = tmp[i];
1532c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1533533e6011SShri Abhyankar   }
1534533e6011SShri Abhyankar 
1535533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1536533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1537533e6011SShri Abhyankar 
15389566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15399566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
15409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1542533e6011SShri Abhyankar 
15439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1544533e6011SShri Abhyankar   PetscFunctionReturn(0);
1545533e6011SShri Abhyankar }
1546533e6011SShri Abhyankar 
15479e25ed09SBarry Smith /* ----------------------------------------------------------------*/
1548b47f5a65SHong Zhang 
15498fc3a347SHong Zhang /*
15508a76ed4fSHong Zhang    ilu() under revised new data structure.
1551813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1552813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1553813a1f2bSShri Abhyankar 
1554813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1555baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1556baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1557baabb860SHong Zhang 
1558813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1559baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1560a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1561813a1f2bSShri Abhyankar 
1562813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1563813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1564813a1f2bSShri Abhyankar */
15659371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1566813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1567bbd65245SShri Abhyankar   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1568bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
15691df811f5SHong Zhang   IS             isicol;
1570813a1f2bSShri Abhyankar 
1571813a1f2bSShri Abhyankar   PetscFunctionBegin;
15729566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
15739566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1574813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
1575813a1f2bSShri Abhyankar 
1576813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
15779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
15782205254eSKarl Rupp 
1579813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_TRUE;
1580*4dfa11a4SJacob Faibussowitsch   if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); }
1581813a1f2bSShri Abhyankar   bdiag = b->diag;
1582813a1f2bSShri Abhyankar 
158348a46eb9SPierre Jolivet   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1584813a1f2bSShri Abhyankar 
1585813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1586813a1f2bSShri Abhyankar   bi = b->i;
1587813a1f2bSShri Abhyankar   bj = b->j;
1588813a1f2bSShri Abhyankar 
1589813a1f2bSShri Abhyankar   /* L part */
1590e60cf9a0SBarry Smith   bi[0] = 0;
1591813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1592813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1593813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1594813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
1595ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1596813a1f2bSShri Abhyankar   }
1597813a1f2bSShri Abhyankar 
1598813a1f2bSShri Abhyankar   /* U part */
159962fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1600813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1601813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1602813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
1603ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1604813a1f2bSShri Abhyankar     /* diag[i] */
1605bbd65245SShri Abhyankar     bj[k++]  = i;
1606a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1607813a1f2bSShri Abhyankar   }
16081df811f5SHong Zhang 
1609d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
16101df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
16111df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
16121df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
161335233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
16149566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
16151df811f5SHong Zhang 
16161df811f5SHong Zhang   b       = (Mat_SeqAIJ *)(fact)->data;
16171df811f5SHong Zhang   b->row  = isrow;
16181df811f5SHong Zhang   b->col  = iscol;
16191df811f5SHong Zhang   b->icol = isicol;
16209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
16219566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
16229566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1623813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
1624813a1f2bSShri Abhyankar }
1625813a1f2bSShri Abhyankar 
16269371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1627813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1628813a1f2bSShri Abhyankar   IS                 isicol;
1629813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
16301df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1631813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1632813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1633813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1634035f4963SHong Zhang   PetscBool          col_identity, row_identity, missing;
1635813a1f2bSShri Abhyankar   PetscReal          f;
16360298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1637813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1638813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
16390298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
16400298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1641813a1f2bSShri Abhyankar 
1642813a1f2bSShri Abhyankar   PetscFunctionBegin;
164308401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
16449566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
164528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1646ad04f41aSHong Zhang 
1647813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
16489566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
16499566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1650813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1651813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
16529566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1653ad540459SPierre Jolivet     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1654813a1f2bSShri Abhyankar     PetscFunctionReturn(0);
1655813a1f2bSShri Abhyankar   }
1656813a1f2bSShri Abhyankar 
16579566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
16589566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
16599566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1660813a1f2bSShri Abhyankar 
16611bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
16629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
16639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1664e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
16659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1666813a1f2bSShri Abhyankar 
1667813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1668813a1f2bSShri Abhyankar   nlnk = n + 1;
16699566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1670813a1f2bSShri Abhyankar 
1671813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
16721df811f5SHong Zhang   f             = info->fill;
16731df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
16749566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1675813a1f2bSShri Abhyankar   current_space = free_space;
16769566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1677813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1678813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1679813a1f2bSShri Abhyankar     nzi = 0;
1680813a1f2bSShri Abhyankar     /* copy current row into linked list */
1681813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
168228b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1683813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1684813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
16859566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1686813a1f2bSShri Abhyankar     nzi += nlnk;
1687813a1f2bSShri Abhyankar 
1688813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1689813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1690813a1f2bSShri Abhyankar       fm = n;
1691813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1692813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1693813a1f2bSShri Abhyankar       lnk[fm]    = i;
1694813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
16959371c9d4SSatish Balay       nzi++;
16969371c9d4SSatish Balay       dcount++;
1697813a1f2bSShri Abhyankar     }
1698813a1f2bSShri Abhyankar 
1699813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1700813a1f2bSShri Abhyankar     nzbd = 0;
1701813a1f2bSShri Abhyankar     prow = lnk[n];
1702813a1f2bSShri Abhyankar     while (prow < i) {
1703813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1704813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1705813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1706813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
17079566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1708813a1f2bSShri Abhyankar       nzi += nlnk;
1709813a1f2bSShri Abhyankar       prow = lnk[prow];
1710813a1f2bSShri Abhyankar       nzbd++;
1711813a1f2bSShri Abhyankar     }
1712813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1713813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1714813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1715813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1716f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
17179566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
17189566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1719813a1f2bSShri Abhyankar       reallocs++;
1720813a1f2bSShri Abhyankar     }
1721813a1f2bSShri Abhyankar 
1722813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
17239566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1724813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1725813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1726813a1f2bSShri Abhyankar 
1727813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
172808401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1729813a1f2bSShri Abhyankar 
1730813a1f2bSShri Abhyankar     current_space->array += nzi;
1731813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1732813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1733813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1734813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1735813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1736813a1f2bSShri Abhyankar   }
1737813a1f2bSShri Abhyankar 
17389566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
17399566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1740813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
17419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
17429566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1743813a1f2bSShri Abhyankar 
17449566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
17459566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
17469566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1747813a1f2bSShri Abhyankar 
1748813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1749813a1f2bSShri Abhyankar   {
1750aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
17519566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
17529566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
17539566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
17549566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
175548a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1756813a1f2bSShri Abhyankar   }
1757813a1f2bSShri Abhyankar #endif
1758813a1f2bSShri Abhyankar   /* put together the new matrix */
17599566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1760813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
17612205254eSKarl Rupp 
1762813a1f2bSShri Abhyankar   b->free_a       = PETSC_TRUE;
1763813a1f2bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
1764813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
17652205254eSKarl Rupp 
17669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
17672205254eSKarl Rupp 
1768813a1f2bSShri Abhyankar   b->j    = bj;
1769813a1f2bSShri Abhyankar   b->i    = bi;
1770813a1f2bSShri Abhyankar   b->diag = bdiag;
1771f4259b30SLisandro Dalcin   b->ilen = NULL;
1772f4259b30SLisandro Dalcin   b->imax = NULL;
1773813a1f2bSShri Abhyankar   b->row  = isrow;
1774813a1f2bSShri Abhyankar   b->col  = iscol;
17759566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
17769566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1777813a1f2bSShri Abhyankar   b->icol = isicol;
17782205254eSKarl Rupp 
17799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1780813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1781813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
1782f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
17832205254eSKarl Rupp 
1784813a1f2bSShri Abhyankar   (fact)->info.factor_mallocs    = reallocs;
1785813a1f2bSShri Abhyankar   (fact)->info.fill_ratio_given  = f;
1786f268cba8SShri Abhyankar   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
178735233d99SShri Abhyankar   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1788ad540459SPierre Jolivet   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
17899566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1790813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
1791813a1f2bSShri Abhyankar }
1792813a1f2bSShri Abhyankar 
17939371c9d4SSatish Balay PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
17946516239fSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
17956516239fSHong Zhang   IS                 isicol;
17966516239fSHong Zhang   const PetscInt    *r, *ic;
1797ece542f9SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
17986516239fSHong Zhang   PetscInt          *bi, *cols, nnz, *cols_lvl;
17996516239fSHong Zhang   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
18006516239fSHong Zhang   PetscInt           i, levels, diagonal_fill;
1801ace3abfcSBarry Smith   PetscBool          col_identity, row_identity;
18026516239fSHong Zhang   PetscReal          f;
18030298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
18046516239fSHong Zhang   PetscBT            lnkbt;
18056516239fSHong Zhang   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
18060298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
18070298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1808ace3abfcSBarry Smith   PetscBool          missing;
18096516239fSHong Zhang 
18106516239fSHong Zhang   PetscFunctionBegin;
181108401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
18129566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
181328b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1814ece542f9SHong Zhang 
18156516239fSHong Zhang   f             = info->fill;
18166516239fSHong Zhang   levels        = (PetscInt)info->levels;
18176516239fSHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
18182205254eSKarl Rupp 
18199566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
18206516239fSHong Zhang 
18219566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
18229566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
18236516239fSHong Zhang   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
18249566063dSJacob Faibussowitsch     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
18252205254eSKarl Rupp 
1826ad04f41aSHong Zhang     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1827ad540459SPierre Jolivet     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1828d5f3da31SBarry Smith     fact->factortype               = MAT_FACTOR_ILU;
1829719d5645SBarry Smith     (fact)->info.factor_mallocs    = 0;
1830719d5645SBarry Smith     (fact)->info.fill_ratio_given  = info->fill;
1831719d5645SBarry Smith     (fact)->info.fill_ratio_needed = 1.0;
18322205254eSKarl Rupp 
1833719d5645SBarry Smith     b       = (Mat_SeqAIJ *)(fact)->data;
183408480c60SBarry Smith     b->row  = isrow;
183508480c60SBarry Smith     b->col  = iscol;
183682bf6240SBarry Smith     b->icol = isicol;
18379566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
18389566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)isrow));
18399566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)iscol));
18403a40ed3dSBarry Smith     PetscFunctionReturn(0);
184108480c60SBarry Smith   }
184208480c60SBarry Smith 
18439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
18449566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
18459e25ed09SBarry Smith 
18461bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
18479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
18489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1849a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
18506cf997b0SBarry Smith 
18519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
18525a8e39fbSHong Zhang 
18535a8e39fbSHong Zhang   /* create a linked list for storing column indices of the active row */
18545a8e39fbSHong Zhang   nlnk = n + 1;
18559566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
18565a8e39fbSHong Zhang 
18575a8e39fbSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
18589566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
18595a8e39fbSHong Zhang   current_space = free_space;
18609566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
18615a8e39fbSHong Zhang   current_space_lvl = free_space_lvl;
18625a8e39fbSHong Zhang 
18635a8e39fbSHong Zhang   for (i = 0; i < n; i++) {
18645a8e39fbSHong Zhang     nzi = 0;
18656cf997b0SBarry Smith     /* copy current row into linked list */
18665a8e39fbSHong Zhang     nnz = ai[r[i] + 1] - ai[r[i]];
186728b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
18685a8e39fbSHong Zhang     cols   = aj + ai[r[i]];
18695a8e39fbSHong Zhang     lnk[i] = -1; /* marker to indicate if diagonal exists */
18709566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
18715a8e39fbSHong Zhang     nzi += nlnk;
18726cf997b0SBarry Smith 
18736cf997b0SBarry Smith     /* make sure diagonal entry is included */
18745a8e39fbSHong Zhang     if (diagonal_fill && lnk[i] == -1) {
18756cf997b0SBarry Smith       fm = n;
18765a8e39fbSHong Zhang       while (lnk[fm] < i) fm = lnk[fm];
18775a8e39fbSHong Zhang       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
18785a8e39fbSHong Zhang       lnk[fm]    = i;
18795a8e39fbSHong Zhang       lnk_lvl[i] = 0;
18809371c9d4SSatish Balay       nzi++;
18819371c9d4SSatish Balay       dcount++;
18826cf997b0SBarry Smith     }
18836cf997b0SBarry Smith 
18845a8e39fbSHong Zhang     /* add pivot rows into the active row */
18855a8e39fbSHong Zhang     nzbd = 0;
18865a8e39fbSHong Zhang     prow = lnk[n];
18875a8e39fbSHong Zhang     while (prow < i) {
18885a8e39fbSHong Zhang       nnz      = bdiag[prow];
18895a8e39fbSHong Zhang       cols     = bj_ptr[prow] + nnz + 1;
18905a8e39fbSHong Zhang       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
18915a8e39fbSHong Zhang       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
18929566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
18935a8e39fbSHong Zhang       nzi += nlnk;
18945a8e39fbSHong Zhang       prow = lnk[prow];
18955a8e39fbSHong Zhang       nzbd++;
189656beaf04SBarry Smith     }
18975a8e39fbSHong Zhang     bdiag[i]  = nzbd;
18985a8e39fbSHong Zhang     bi[i + 1] = bi[i] + nzi;
1899ecf371e4SBarry Smith 
19005a8e39fbSHong Zhang     /* if free space is not available, make more free space */
19015a8e39fbSHong Zhang     if (current_space->local_remaining < nzi) {
1902f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
19039566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
19049566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
19055a8e39fbSHong Zhang       reallocs++;
19065a8e39fbSHong Zhang     }
1907ecf371e4SBarry Smith 
19085a8e39fbSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
19099566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
19105a8e39fbSHong Zhang     bj_ptr[i]    = current_space->array;
19115a8e39fbSHong Zhang     bjlvl_ptr[i] = current_space_lvl->array;
19125a8e39fbSHong Zhang 
19135a8e39fbSHong Zhang     /* make sure the active row i has diagonal entry */
191408401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
19155a8e39fbSHong Zhang 
19165a8e39fbSHong Zhang     current_space->array += nzi;
19175a8e39fbSHong Zhang     current_space->local_used += nzi;
19185a8e39fbSHong Zhang     current_space->local_remaining -= nzi;
19195a8e39fbSHong Zhang     current_space_lvl->array += nzi;
19205a8e39fbSHong Zhang     current_space_lvl->local_used += nzi;
19215a8e39fbSHong Zhang     current_space_lvl->local_remaining -= nzi;
19229e25ed09SBarry Smith   }
19235a8e39fbSHong Zhang 
19249566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
19259566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
19265a8e39fbSHong Zhang 
19275a8e39fbSHong Zhang   /* destroy list of free space and other temporary arrays */
19289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
19299566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
19309566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
19319566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
19329566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
19339e25ed09SBarry Smith 
19346cf91177SBarry Smith #if defined(PETSC_USE_INFO)
1935f64065bbSBarry Smith   {
19365a8e39fbSHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
19379566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
19389566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
19399566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
19409566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
194148a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1942f64065bbSBarry Smith   }
194363ba0a88SBarry Smith #endif
1944bbb6d6a8SBarry Smith 
19459e25ed09SBarry Smith   /* put together the new matrix */
19469566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1947719d5645SBarry Smith   b = (Mat_SeqAIJ *)(fact)->data;
19482205254eSKarl Rupp 
1949e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
1950e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
19517c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
19522205254eSKarl Rupp 
19539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n], &b->a));
19545a8e39fbSHong Zhang   b->j = bj;
19555a8e39fbSHong Zhang   b->i = bi;
19565a8e39fbSHong Zhang   for (i = 0; i < n; i++) bdiag[i] += bi[i];
19575a8e39fbSHong Zhang   b->diag = bdiag;
1958f4259b30SLisandro Dalcin   b->ilen = NULL;
1959f4259b30SLisandro Dalcin   b->imax = NULL;
1960416022c9SBarry Smith   b->row  = isrow;
1961416022c9SBarry Smith   b->col  = iscol;
19629566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
19639566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
196482bf6240SBarry Smith   b->icol = isicol;
19659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1966416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
19675a8e39fbSHong Zhang      Allocate bdiag, solve_work, new a, new j */
19685a8e39fbSHong Zhang   b->maxnz = b->nz = bi[n];
19692205254eSKarl Rupp 
1970719d5645SBarry Smith   (fact)->info.factor_mallocs    = reallocs;
1971719d5645SBarry Smith   (fact)->info.fill_ratio_given  = f;
1972719d5645SBarry Smith   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973ad04f41aSHong Zhang   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
1974ad540459SPierre Jolivet   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
19753a40ed3dSBarry Smith   PetscFunctionReturn(0);
19769e25ed09SBarry Smith }
1977d5d45c9bSBarry Smith 
19789371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
19795f44c854SHong Zhang   Mat             C  = B;
19805f44c854SHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
19815f44c854SHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
19825f44c854SHong Zhang   IS              ip = b->row, iip = b->icol;
19835f44c854SHong Zhang   const PetscInt *rip, *riip;
19845f44c854SHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
19855f44c854SHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
19865f44c854SHong Zhang   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
19875f44c854SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1988ace3abfcSBarry Smith   PetscBool       perm_identity;
1989d90ac83dSHong Zhang   FactorShiftCtx  sctx;
199098a93161SHong Zhang   PetscReal       rs;
199198a93161SHong Zhang   MatScalar       d, *v;
199298a93161SHong Zhang 
19935f44c854SHong Zhang   PetscFunctionBegin;
199498a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
19959566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
199698a93161SHong Zhang 
1997f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
199898a93161SHong Zhang     sctx.shift_top = info->zeropivot;
199998a93161SHong Zhang     for (i = 0; i < mbs; i++) {
200098a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
200198a93161SHong Zhang       d  = (aa)[a->diag[i]];
200298a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
200398a93161SHong Zhang       v  = aa + ai[i];
200498a93161SHong Zhang       nz = ai[i + 1] - ai[i];
20052205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
200698a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
200798a93161SHong Zhang     }
200898a93161SHong Zhang     sctx.shift_top *= 1.1;
200998a93161SHong Zhang     sctx.nshift_max = 5;
201098a93161SHong Zhang     sctx.shift_lo   = 0.;
201198a93161SHong Zhang     sctx.shift_hi   = 1.;
201298a93161SHong Zhang   }
20135f44c854SHong Zhang 
20149566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
20159566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
20165f44c854SHong Zhang 
20175f44c854SHong Zhang   /* allocate working arrays
20185f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
20195f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
20205f44c854SHong Zhang   */
20219566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
20225f44c854SHong Zhang 
202398a93161SHong Zhang   do {
202407b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
202598a93161SHong Zhang 
2026c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
20272e987568SSatish Balay     if (mbs) il[0] = 0;
20285f44c854SHong Zhang 
20295f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
20305f44c854SHong Zhang       /* zero rtmp */
20315f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
20325f44c854SHong Zhang       bjtmp = bj + bi[k];
20335f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
20345f44c854SHong Zhang 
20355f44c854SHong Zhang       /* load in initial unfactored row */
20365f44c854SHong Zhang       bval = ba + bi[k];
20379371c9d4SSatish Balay       jmin = ai[rip[k]];
20389371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
20395f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
20405f44c854SHong Zhang         col = riip[aj[j]];
20415f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
20425f44c854SHong Zhang           rtmp[col] = aa[j];
20435f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
20445f44c854SHong Zhang         }
20455f44c854SHong Zhang       }
204698a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
204798a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
20485f44c854SHong Zhang 
20495f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
20505f44c854SHong Zhang       dk = rtmp[k];
20515f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
20525f44c854SHong Zhang 
20535f44c854SHong Zhang       while (i < k) {
20545f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
20555f44c854SHong Zhang 
20565f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
20575f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
20585f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
20595f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
20605f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
20615f44c854SHong Zhang 
20625f44c854SHong Zhang         /* add multiple of row i to k-th row */
20639371c9d4SSatish Balay         jmin = ili + 1;
20649371c9d4SSatish Balay         jmax = bi[i + 1];
20655f44c854SHong Zhang         if (jmin < jmax) {
20665f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
20675f44c854SHong Zhang           /* update il and c2r for row i */
20685f44c854SHong Zhang           il[i]  = jmin;
20699371c9d4SSatish Balay           j      = bj[jmin];
20709371c9d4SSatish Balay           c2r[i] = c2r[j];
20719371c9d4SSatish Balay           c2r[j] = i;
20725f44c854SHong Zhang         }
20735f44c854SHong Zhang         i = nexti;
20745f44c854SHong Zhang       }
20755f44c854SHong Zhang 
20765f44c854SHong Zhang       /* copy data into U(k,:) */
207798a93161SHong Zhang       rs   = 0.0;
20789371c9d4SSatish Balay       jmin = bi[k];
20799371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
20805f44c854SHong Zhang       if (jmin < jmax) {
20815f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
20829371c9d4SSatish Balay           col   = bj[j];
20839371c9d4SSatish Balay           ba[j] = rtmp[col];
20849371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
20855f44c854SHong Zhang         }
20865f44c854SHong Zhang         /* add the k-th row into il and c2r */
20875f44c854SHong Zhang         il[k]  = jmin;
20889371c9d4SSatish Balay         i      = bj[jmin];
20899371c9d4SSatish Balay         c2r[k] = c2r[i];
20909371c9d4SSatish Balay         c2r[i] = k;
20915f44c854SHong Zhang       }
209298a93161SHong Zhang 
209398a93161SHong Zhang       /* MatPivotCheck() */
209498a93161SHong Zhang       sctx.rs = rs;
209598a93161SHong Zhang       sctx.pv = dk;
20969566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
209707b50cabSHong Zhang       if (sctx.newshift) break;
209898a93161SHong Zhang       dk = sctx.pv;
209998a93161SHong Zhang 
210098a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
210198a93161SHong Zhang     }
210207b50cabSHong Zhang   } while (sctx.newshift);
21035f44c854SHong Zhang 
21049566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
21059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
21069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
21075f44c854SHong Zhang 
21089566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
21095f44c854SHong Zhang   if (perm_identity) {
211035233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
211135233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
21122169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
21132169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
21145f44c854SHong Zhang   } else {
211535233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
211635233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
211735233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
211835233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
21195f44c854SHong Zhang   }
21205f44c854SHong Zhang 
21215f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
21225f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
21232205254eSKarl Rupp 
21249566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
212598a93161SHong Zhang 
212698a93161SHong Zhang   /* MatPivotView() */
212798a93161SHong Zhang   if (sctx.nshift) {
2128f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
21299566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2130f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
21319566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2132f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
21339566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
213498a93161SHong Zhang     }
213598a93161SHong Zhang   }
21365f44c854SHong Zhang   PetscFunctionReturn(0);
21375f44c854SHong Zhang }
21385f44c854SHong Zhang 
21399371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
2140719d5645SBarry Smith   Mat             C  = B;
21410968510aSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
21422ed133dbSHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
21439bfd6278SHong Zhang   IS              ip = b->row, iip = b->icol;
21445d0c19d7SBarry Smith   const PetscInt *rip, *riip;
2145c5546cabSHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
21462ed133dbSHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
21472ed133dbSHong Zhang   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2148622e2028SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2149ace3abfcSBarry Smith   PetscBool       perm_identity;
21500e95ead3SHong Zhang   FactorShiftCtx  sctx;
21510e95ead3SHong Zhang   PetscReal       rs;
21520e95ead3SHong Zhang   MatScalar       d, *v;
2153d5d45c9bSBarry Smith 
2154a6175056SHong Zhang   PetscFunctionBegin;
21550e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
21569566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
21570e95ead3SHong Zhang 
21580e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
21590e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
21600e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
21610e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
21620e95ead3SHong Zhang       d  = (aa)[a->diag[i]];
21630e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
21640e95ead3SHong Zhang       v  = aa + ai[i];
21650e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
21662205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
21670e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
21680e95ead3SHong Zhang     }
21690e95ead3SHong Zhang     sctx.shift_top *= 1.1;
21700e95ead3SHong Zhang     sctx.nshift_max = 5;
21710e95ead3SHong Zhang     sctx.shift_lo   = 0.;
21720e95ead3SHong Zhang     sctx.shift_hi   = 1.;
21730e95ead3SHong Zhang   }
2174ee45ca4aSHong Zhang 
21759566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
21769566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
21772ed133dbSHong Zhang 
21782ed133dbSHong Zhang   /* initialization */
21799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
21800e95ead3SHong Zhang 
21812ed133dbSHong Zhang   do {
218207b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
21830e95ead3SHong Zhang 
2184c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
2185c5546cabSHong Zhang     il[0] = 0;
21862ed133dbSHong Zhang 
21872ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
2188c5546cabSHong Zhang       /* zero rtmp */
2189c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
2190c5546cabSHong Zhang       bjtmp = bj + bi[k];
2191c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2192c5546cabSHong Zhang 
2193c05c3958SHong Zhang       bval = ba + bi[k];
21942ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
21959371c9d4SSatish Balay       jmin = ai[rip[k]];
21969371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
21972ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
21989bfd6278SHong Zhang         col = riip[aj[j]];
21992ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
22002ed133dbSHong Zhang           rtmp[col] = aa[j];
2201c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
22022ed133dbSHong Zhang         }
22032ed133dbSHong Zhang       }
220439e3d630SHong Zhang       /* shift the diagonal of the matrix */
2205540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
22062ed133dbSHong Zhang 
22072ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
22082ed133dbSHong Zhang       dk = rtmp[k];
22092ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
22102ed133dbSHong Zhang 
22112ed133dbSHong Zhang       while (i < k) {
22122ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
22132ed133dbSHong Zhang 
22142ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
22152ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
22162ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
22172ed133dbSHong Zhang         dk += uikdi * ba[ili];
22182ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
22192ed133dbSHong Zhang 
22202ed133dbSHong Zhang         /* add multiple of row i to k-th row */
22219371c9d4SSatish Balay         jmin = ili + 1;
22229371c9d4SSatish Balay         jmax = bi[i + 1];
22232ed133dbSHong Zhang         if (jmin < jmax) {
22242ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
22252ed133dbSHong Zhang           /* update il and jl for row i */
22262ed133dbSHong Zhang           il[i] = jmin;
22279371c9d4SSatish Balay           j     = bj[jmin];
22289371c9d4SSatish Balay           jl[i] = jl[j];
22299371c9d4SSatish Balay           jl[j] = i;
22302ed133dbSHong Zhang         }
22312ed133dbSHong Zhang         i = nexti;
22322ed133dbSHong Zhang       }
22332ed133dbSHong Zhang 
22340697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
22350697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
22360697b6caSHong Zhang       rs   = 0.0;
22373c9e8598SHong Zhang       jmin = bi[k] + 1;
22383c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
22393c9e8598SHong Zhang       bcol = bj + jmin;
2240ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2241540703acSHong Zhang 
2242540703acSHong Zhang       sctx.rs = rs;
2243540703acSHong Zhang       sctx.pv = dk;
22449566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
224507b50cabSHong Zhang       if (sctx.newshift) break;
22460e95ead3SHong Zhang       dk = sctx.pv;
22473c9e8598SHong Zhang 
22483c9e8598SHong Zhang       /* copy data into U(k,:) */
224939e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
22509371c9d4SSatish Balay       jmin      = bi[k] + 1;
22519371c9d4SSatish Balay       jmax      = bi[k + 1];
225239e3d630SHong Zhang       if (jmin < jmax) {
225339e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
22549371c9d4SSatish Balay           col   = bj[j];
22559371c9d4SSatish Balay           ba[j] = rtmp[col];
22563c9e8598SHong Zhang         }
225739e3d630SHong Zhang         /* add the k-th row into il and jl */
22583c9e8598SHong Zhang         il[k] = jmin;
22599371c9d4SSatish Balay         i     = bj[jmin];
22609371c9d4SSatish Balay         jl[k] = jl[i];
22619371c9d4SSatish Balay         jl[i] = k;
22623c9e8598SHong Zhang       }
22633c9e8598SHong Zhang     }
226407b50cabSHong Zhang   } while (sctx.newshift);
22650e95ead3SHong Zhang 
22669566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
22679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
22689566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2269db4efbfdSBarry Smith 
22709566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2271db4efbfdSBarry Smith   if (perm_identity) {
22720a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22730a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22740a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
22750a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2276db4efbfdSBarry Smith   } else {
22770a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
22780a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
22790a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
22800a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2281db4efbfdSBarry Smith   }
2282db4efbfdSBarry Smith 
22833c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
22843c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
22852205254eSKarl Rupp 
22869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2287540703acSHong Zhang   if (sctx.nshift) {
2288f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
22899566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2290f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
22919566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
22920697b6caSHong Zhang     }
22933c9e8598SHong Zhang   }
22943c9e8598SHong Zhang   PetscFunctionReturn(0);
22953c9e8598SHong Zhang }
2296a6175056SHong Zhang 
22978a76ed4fSHong Zhang /*
22988a76ed4fSHong Zhang    icc() under revised new data structure.
22998a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
23008a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
23018a76ed4fSHong Zhang 
23028a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
23038a76ed4fSHong Zhang    ui+
23048a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
23058a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
23068a76ed4fSHong Zhang 
23078a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
23088a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
23098a76ed4fSHong Zhang 
23108a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
23118a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
23128a76ed4fSHong Zhang */
23138a76ed4fSHong Zhang 
23149371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
23150968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2316ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2317ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
23185f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
23195d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
2320ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
23210298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
23225a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2323ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
23240298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
23250298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
23267a48dd6fSHong Zhang   PetscBT            lnkbt;
2327b635d36bSHong Zhang   IS                 iperm;
2328a6175056SHong Zhang 
2329a6175056SHong Zhang   PetscFunctionBegin;
233008401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
23319566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
233228b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
23339566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
23349566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
23357a48dd6fSHong Zhang 
23369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
23379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
233839e3d630SHong Zhang   ui[0] = 0;
233939e3d630SHong Zhang 
23408a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2341622e2028SHong Zhang   if (!levels && perm_identity) {
23425f44c854SHong Zhang     for (i = 0; i < am; i++) {
2343c5546cabSHong Zhang       ncols     = ai[i + 1] - a->diag[i];
2344c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2345c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2346dc1e2a3fSHong Zhang     }
23479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2348dc1e2a3fSHong Zhang     cols = uj;
2349dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
23505f44c854SHong Zhang       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2351dc1e2a3fSHong Zhang       ncols = ai[i + 1] - a->diag[i] - 1;
23525f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2353910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
23545f44c854SHong Zhang     }
23555f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
23569566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
23579566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
23585f44c854SHong Zhang 
23595f44c854SHong Zhang     /* initialization */
23609566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
23615f44c854SHong Zhang 
23625f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
23635f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
23649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
23655f44c854SHong Zhang     for (i = 0; i < am; i++) {
23669371c9d4SSatish Balay       jl[i] = am;
23679371c9d4SSatish Balay       il[i] = 0;
23685f44c854SHong Zhang     }
23695f44c854SHong Zhang 
23705f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
23715f44c854SHong Zhang     nlnk = am + 1;
23729566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
23735f44c854SHong Zhang 
237495b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
23759566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
23765f44c854SHong Zhang     current_space = free_space;
23779566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
23785f44c854SHong Zhang     current_space_lvl = free_space_lvl;
23795f44c854SHong Zhang 
23805f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
23815f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
23825f44c854SHong Zhang       nzk   = 0;
23835f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
238428b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
23855f44c854SHong Zhang       ncols_upper = 0;
23865f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
23875f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
23885f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
23895f44c854SHong Zhang           ajtmp[ncols_upper] = i;
23905f44c854SHong Zhang           ncols_upper++;
23915f44c854SHong Zhang         }
23925f44c854SHong Zhang       }
23939566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
23945f44c854SHong Zhang       nzk += nlnk;
23955f44c854SHong Zhang 
23965f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
23975f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
23985f44c854SHong Zhang 
23995f44c854SHong Zhang       while (prow < k) {
24005f44c854SHong Zhang         nextprow = jl[prow];
24015f44c854SHong Zhang 
24025f44c854SHong Zhang         /* merge prow into k-th row */
24035f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
24045f44c854SHong Zhang         jmax  = ui[prow + 1];
24055f44c854SHong Zhang         ncols = jmax - jmin;
24065f44c854SHong Zhang         i     = jmin - ui[prow];
24075f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
24085f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
24095f44c854SHong Zhang         j     = *(uj - 1);
24109566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
24115f44c854SHong Zhang         nzk += nlnk;
24125f44c854SHong Zhang 
24135f44c854SHong Zhang         /* update il and jl for prow */
24145f44c854SHong Zhang         if (jmin < jmax) {
24155f44c854SHong Zhang           il[prow] = jmin;
24169371c9d4SSatish Balay           j        = *cols;
24179371c9d4SSatish Balay           jl[prow] = jl[j];
24189371c9d4SSatish Balay           jl[j]    = prow;
24195f44c854SHong Zhang         }
24205f44c854SHong Zhang         prow = nextprow;
24215f44c854SHong Zhang       }
24225f44c854SHong Zhang 
24235f44c854SHong Zhang       /* if free space is not available, make more free space */
24245f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
24255f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2426f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
24279566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
24289566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
24295f44c854SHong Zhang         reallocs++;
24305f44c854SHong Zhang       }
24315f44c854SHong Zhang 
24325f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
243308401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
24349566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
24355f44c854SHong Zhang 
24365f44c854SHong Zhang       /* add the k-th row into il and jl */
24375f44c854SHong Zhang       if (nzk > 1) {
24385f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
24399371c9d4SSatish Balay         jl[k] = jl[i];
24409371c9d4SSatish Balay         jl[i] = k;
24415f44c854SHong Zhang         il[k] = ui[k] + 1;
24425f44c854SHong Zhang       }
24435f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
24445f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
24455f44c854SHong Zhang 
24465f44c854SHong Zhang       current_space->array += nzk;
24475f44c854SHong Zhang       current_space->local_used += nzk;
24485f44c854SHong Zhang       current_space->local_remaining -= nzk;
24495f44c854SHong Zhang 
24505f44c854SHong Zhang       current_space_lvl->array += nzk;
24515f44c854SHong Zhang       current_space_lvl->local_used += nzk;
24525f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
24535f44c854SHong Zhang 
24545f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
24555f44c854SHong Zhang     }
24565f44c854SHong Zhang 
24579566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
24589566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
24599566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
24609566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
24615f44c854SHong Zhang 
24629263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
24639566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
24649566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
24659566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
24669566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
24675f44c854SHong Zhang 
24685f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
24695f44c854SHong Zhang 
24705f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
24715f44c854SHong Zhang   b               = (Mat_SeqSBAIJ *)(fact)->data;
24725f44c854SHong Zhang   b->singlemalloc = PETSC_FALSE;
24732205254eSKarl Rupp 
24749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
24752205254eSKarl Rupp 
24765f44c854SHong Zhang   b->j         = uj;
24775f44c854SHong Zhang   b->i         = ui;
24785f44c854SHong Zhang   b->diag      = udiag;
24797f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2480f4259b30SLisandro Dalcin   b->ilen      = NULL;
2481f4259b30SLisandro Dalcin   b->imax      = NULL;
24825f44c854SHong Zhang   b->row       = perm;
24835f44c854SHong Zhang   b->col       = perm;
24849566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
24859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
24865f44c854SHong Zhang   b->icol          = iperm;
24875f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
24882205254eSKarl Rupp 
24899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
24902205254eSKarl Rupp 
24915f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
24925f44c854SHong Zhang   b->free_a        = PETSC_TRUE;
24935f44c854SHong Zhang   b->free_ij       = PETSC_TRUE;
24945f44c854SHong Zhang 
2495f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2496f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
24975f44c854SHong Zhang   if (ai[am] != 0) {
24986a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
249995b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
25005f44c854SHong Zhang   } else {
2501f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
25025f44c854SHong Zhang   }
25039263d837SHong Zhang #if defined(PETSC_USE_INFO)
25049263d837SHong Zhang   if (ai[am] != 0) {
25059263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
25069566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
25079566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
25089566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
25099263d837SHong Zhang   } else {
25109566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
25119263d837SHong Zhang   }
25129263d837SHong Zhang #endif
251335233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
25145f44c854SHong Zhang   PetscFunctionReturn(0);
25155f44c854SHong Zhang }
25165f44c854SHong Zhang 
25179371c9d4SSatish Balay PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
25185f44c854SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
25195f44c854SHong Zhang   Mat_SeqSBAIJ      *b;
2520ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
25215f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
25225f44c854SHong Zhang   const PetscInt    *rip, *riip;
25235f44c854SHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
25240298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
25255f44c854SHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
25265f44c854SHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
25270298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
25280298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
25295f44c854SHong Zhang   PetscBT            lnkbt;
25305f44c854SHong Zhang   IS                 iperm;
25315f44c854SHong Zhang 
25325f44c854SHong Zhang   PetscFunctionBegin;
253308401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
25349566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
253528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
25369566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
25379566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
25385f44c854SHong Zhang 
25399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
25409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
25415f44c854SHong Zhang   ui[0] = 0;
25425f44c854SHong Zhang 
25435f44c854SHong Zhang   /* ICC(0) without matrix ordering: simply copies fill pattern */
25445f44c854SHong Zhang   if (!levels && perm_identity) {
25455f44c854SHong Zhang     for (i = 0; i < am; i++) {
25465f44c854SHong Zhang       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
25475f44c854SHong Zhang       udiag[i]  = ui[i];
2548ed59401aSHong Zhang     }
25499566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
255039e3d630SHong Zhang     cols = uj;
2551ed59401aSHong Zhang     for (i = 0; i < am; i++) {
2552ed59401aSHong Zhang       aj    = a->j + a->diag[i];
255339e3d630SHong Zhang       ncols = ui[i + 1] - ui[i];
255439e3d630SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2555ed59401aSHong Zhang     }
255639e3d630SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
25579566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
25589566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
2559b635d36bSHong Zhang 
25607a48dd6fSHong Zhang     /* initialization */
25619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
25627a48dd6fSHong Zhang 
25637a48dd6fSHong Zhang     /* jl: linked list for storing indices of the pivot rows
25647a48dd6fSHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
25659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
25667a48dd6fSHong Zhang     for (i = 0; i < am; i++) {
25679371c9d4SSatish Balay       jl[i] = am;
25689371c9d4SSatish Balay       il[i] = 0;
25697a48dd6fSHong Zhang     }
25707a48dd6fSHong Zhang 
25717a48dd6fSHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
25727a48dd6fSHong Zhang     nlnk = am + 1;
25739566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
25747a48dd6fSHong Zhang 
25757a48dd6fSHong Zhang     /* initial FreeSpace size is fill*(ai[am]+1) */
25769566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
25777a48dd6fSHong Zhang     current_space = free_space;
25789566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
25797a48dd6fSHong Zhang     current_space_lvl = free_space_lvl;
25807a48dd6fSHong Zhang 
25817a48dd6fSHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
25827a48dd6fSHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
25837a48dd6fSHong Zhang       nzk   = 0;
2584622e2028SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
258528b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2586622e2028SHong Zhang       ncols_upper = 0;
2587622e2028SHong Zhang       for (j = 0; j < ncols; j++) {
2588b635d36bSHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2589b635d36bSHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
25905a8e39fbSHong Zhang           ajtmp[ncols_upper] = i;
2591622e2028SHong Zhang           ncols_upper++;
2592622e2028SHong Zhang         }
2593622e2028SHong Zhang       }
25949566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
25957a48dd6fSHong Zhang       nzk += nlnk;
25967a48dd6fSHong Zhang 
25977a48dd6fSHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
25987a48dd6fSHong Zhang       prow = jl[k]; /* 1st pivot row */
25997a48dd6fSHong Zhang 
26007a48dd6fSHong Zhang       while (prow < k) {
26017a48dd6fSHong Zhang         nextprow = jl[prow];
26027a48dd6fSHong Zhang 
26037a48dd6fSHong Zhang         /* merge prow into k-th row */
26047a48dd6fSHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
26057a48dd6fSHong Zhang         jmax  = ui[prow + 1];
26067a48dd6fSHong Zhang         ncols = jmax - jmin;
2607ed59401aSHong Zhang         i     = jmin - ui[prow];
2608ed59401aSHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
26095a8e39fbSHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
26105a8e39fbSHong Zhang         j     = *(uj - 1);
26119566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
26127a48dd6fSHong Zhang         nzk += nlnk;
26137a48dd6fSHong Zhang 
26147a48dd6fSHong Zhang         /* update il and jl for prow */
26157a48dd6fSHong Zhang         if (jmin < jmax) {
26167a48dd6fSHong Zhang           il[prow] = jmin;
26179371c9d4SSatish Balay           j        = *cols;
26189371c9d4SSatish Balay           jl[prow] = jl[j];
26199371c9d4SSatish Balay           jl[j]    = prow;
26207a48dd6fSHong Zhang         }
26217a48dd6fSHong Zhang         prow = nextprow;
26227a48dd6fSHong Zhang       }
26237a48dd6fSHong Zhang 
26247a48dd6fSHong Zhang       /* if free space is not available, make more free space */
26257a48dd6fSHong Zhang       if (current_space->local_remaining < nzk) {
26267a48dd6fSHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2627f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
26289566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
26299566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
26307a48dd6fSHong Zhang         reallocs++;
26317a48dd6fSHong Zhang       }
26327a48dd6fSHong Zhang 
26337a48dd6fSHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
263428b400f6SJacob Faibussowitsch       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
26359566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
26367a48dd6fSHong Zhang 
26377a48dd6fSHong Zhang       /* add the k-th row into il and jl */
263839e3d630SHong Zhang       if (nzk > 1) {
26397a48dd6fSHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
26409371c9d4SSatish Balay         jl[k] = jl[i];
26419371c9d4SSatish Balay         jl[i] = k;
26427a48dd6fSHong Zhang         il[k] = ui[k] + 1;
26437a48dd6fSHong Zhang       }
26447a48dd6fSHong Zhang       uj_ptr[k]     = current_space->array;
26457a48dd6fSHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
26467a48dd6fSHong Zhang 
26477a48dd6fSHong Zhang       current_space->array += nzk;
26487a48dd6fSHong Zhang       current_space->local_used += nzk;
26497a48dd6fSHong Zhang       current_space->local_remaining -= nzk;
26507a48dd6fSHong Zhang 
26517a48dd6fSHong Zhang       current_space_lvl->array += nzk;
26527a48dd6fSHong Zhang       current_space_lvl->local_used += nzk;
26537a48dd6fSHong Zhang       current_space_lvl->local_remaining -= nzk;
26547a48dd6fSHong Zhang 
26557a48dd6fSHong Zhang       ui[k + 1] = ui[k] + nzk;
26567a48dd6fSHong Zhang     }
26577a48dd6fSHong Zhang 
26586cf91177SBarry Smith #if defined(PETSC_USE_INFO)
26597a48dd6fSHong Zhang     if (ai[am] != 0) {
266039e3d630SHong Zhang       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
26619566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
26629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
26639566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
26647a48dd6fSHong Zhang     } else {
26659566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Empty matrix\n"));
26667a48dd6fSHong Zhang     }
266763ba0a88SBarry Smith #endif
26687a48dd6fSHong Zhang 
26699566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
26709566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
26719566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
26729566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
26737a48dd6fSHong Zhang 
26747a48dd6fSHong Zhang     /* destroy list of free space and other temporary array(s) */
26759566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
26769566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
26779566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
26789566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
26797a48dd6fSHong Zhang 
268039e3d630SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
268139e3d630SHong Zhang 
26827a48dd6fSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
26837a48dd6fSHong Zhang 
2684f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
26857a48dd6fSHong Zhang   b->singlemalloc = PETSC_FALSE;
26862205254eSKarl Rupp 
26879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
26882205254eSKarl Rupp 
26897a48dd6fSHong Zhang   b->j         = uj;
26907a48dd6fSHong Zhang   b->i         = ui;
26915f44c854SHong Zhang   b->diag      = udiag;
26927f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2693f4259b30SLisandro Dalcin   b->ilen      = NULL;
2694f4259b30SLisandro Dalcin   b->imax      = NULL;
26957a48dd6fSHong Zhang   b->row       = perm;
2696b635d36bSHong Zhang   b->col       = perm;
26972205254eSKarl Rupp 
26989566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
26999566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
27002205254eSKarl Rupp 
2701b635d36bSHong Zhang   b->icol          = iperm;
27027a48dd6fSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
27039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
27047a48dd6fSHong Zhang   b->maxnz = b->nz = ui[am];
2705e6b907acSBarry Smith   b->free_a        = PETSC_TRUE;
2706e6b907acSBarry Smith   b->free_ij       = PETSC_TRUE;
27077a48dd6fSHong Zhang 
2708f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2709f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
27107a48dd6fSHong Zhang   if (ai[am] != 0) {
2711f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
27127a48dd6fSHong Zhang   } else {
2713f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
27147a48dd6fSHong Zhang   }
27150a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2716a6175056SHong Zhang   PetscFunctionReturn(0);
2717a6175056SHong Zhang }
2718d5d45c9bSBarry Smith 
27199371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
27200be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
27210be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
27229186b105SHong Zhang   PetscBool          perm_identity, missing;
27230be760fbSHong Zhang   PetscReal          fill = info->fill;
27240be760fbSHong Zhang   const PetscInt    *rip, *riip;
27250be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
27260be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
27270be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
27280298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
27290be760fbSHong Zhang   PetscBT            lnkbt;
27300be760fbSHong Zhang   IS                 iperm;
27310be760fbSHong Zhang 
27320be760fbSHong Zhang   PetscFunctionBegin;
273308401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
27349566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
273528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
27369186b105SHong Zhang 
27370be760fbSHong Zhang   /* check whether perm is the identity mapping */
27389566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
27399566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
27409566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
27419566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
27420be760fbSHong Zhang 
27430be760fbSHong Zhang   /* initialization */
27449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
27459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
27460be760fbSHong Zhang   ui[0] = 0;
27470be760fbSHong Zhang 
27480be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
27490be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
27509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
27510be760fbSHong Zhang   for (i = 0; i < am; i++) {
27529371c9d4SSatish Balay     jl[i] = am;
27539371c9d4SSatish Balay     il[i] = 0;
27540be760fbSHong Zhang   }
27550be760fbSHong Zhang 
27560be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
27570be760fbSHong Zhang   nlnk = am + 1;
27589566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
27590be760fbSHong Zhang 
276095b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
27619566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
27620be760fbSHong Zhang   current_space = free_space;
27630be760fbSHong Zhang 
27640be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
27650be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
27660be760fbSHong Zhang     nzk   = 0;
27670be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
276828b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
27690be760fbSHong Zhang     ncols_upper = 0;
27700be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
27710be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
27720be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
27730be760fbSHong Zhang         cols[ncols_upper] = i;
27740be760fbSHong Zhang         ncols_upper++;
27750be760fbSHong Zhang       }
27760be760fbSHong Zhang     }
27779566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
27780be760fbSHong Zhang     nzk += nlnk;
27790be760fbSHong Zhang 
27800be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
27810be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
27820be760fbSHong Zhang 
27830be760fbSHong Zhang     while (prow < k) {
27840be760fbSHong Zhang       nextprow = jl[prow];
27850be760fbSHong Zhang       /* merge prow into k-th row */
27860be760fbSHong Zhang       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
27870be760fbSHong Zhang       jmax     = ui[prow + 1];
27880be760fbSHong Zhang       ncols    = jmax - jmin;
27890be760fbSHong Zhang       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
27909566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
27910be760fbSHong Zhang       nzk += nlnk;
27920be760fbSHong Zhang 
27930be760fbSHong Zhang       /* update il and jl for prow */
27940be760fbSHong Zhang       if (jmin < jmax) {
27950be760fbSHong Zhang         il[prow] = jmin;
27962205254eSKarl Rupp         j        = *uj_ptr;
27972205254eSKarl Rupp         jl[prow] = jl[j];
27982205254eSKarl Rupp         jl[j]    = prow;
27990be760fbSHong Zhang       }
28000be760fbSHong Zhang       prow = nextprow;
28010be760fbSHong Zhang     }
28020be760fbSHong Zhang 
28030be760fbSHong Zhang     /* if free space is not available, make more free space */
28040be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
28050be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2806f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
28079566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
28080be760fbSHong Zhang       reallocs++;
28090be760fbSHong Zhang     }
28100be760fbSHong Zhang 
28110be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
28129566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
28130be760fbSHong Zhang 
28140be760fbSHong Zhang     /* add the k-th row into il and jl */
28157b056e98SHong Zhang     if (nzk > 1) {
28160be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
28179371c9d4SSatish Balay       jl[k] = jl[i];
28189371c9d4SSatish Balay       jl[i] = k;
28190be760fbSHong Zhang       il[k] = ui[k] + 1;
28200be760fbSHong Zhang     }
28210be760fbSHong Zhang     ui_ptr[k] = current_space->array;
28222205254eSKarl Rupp 
28230be760fbSHong Zhang     current_space->array += nzk;
28240be760fbSHong Zhang     current_space->local_used += nzk;
28250be760fbSHong Zhang     current_space->local_remaining -= nzk;
28260be760fbSHong Zhang 
28270be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
28280be760fbSHong Zhang   }
28290be760fbSHong Zhang 
28309566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
28319566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
28329566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
28330be760fbSHong Zhang 
28349263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
28359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
28369566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
28379566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
28380be760fbSHong Zhang 
28390be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
28400be760fbSHong Zhang 
2841f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
28420be760fbSHong Zhang   b->singlemalloc = PETSC_FALSE;
28430be760fbSHong Zhang   b->free_a       = PETSC_TRUE;
28440be760fbSHong Zhang   b->free_ij      = PETSC_TRUE;
28452205254eSKarl Rupp 
28469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
28472205254eSKarl Rupp 
28480be760fbSHong Zhang   b->j         = uj;
28490be760fbSHong Zhang   b->i         = ui;
28500be760fbSHong Zhang   b->diag      = udiag;
28510be760fbSHong Zhang   b->free_diag = PETSC_TRUE;
2852f4259b30SLisandro Dalcin   b->ilen      = NULL;
2853f4259b30SLisandro Dalcin   b->imax      = NULL;
28540be760fbSHong Zhang   b->row       = perm;
28550be760fbSHong Zhang   b->col       = perm;
285626fbe8dcSKarl Rupp 
28579566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28589566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28592205254eSKarl Rupp 
28600be760fbSHong Zhang   b->icol          = iperm;
28610be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
286226fbe8dcSKarl Rupp 
28639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
28642205254eSKarl Rupp 
28650be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
28660be760fbSHong Zhang 
2867f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2868f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
28690be760fbSHong Zhang   if (ai[am] != 0) {
28706a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
287195b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
28720be760fbSHong Zhang   } else {
2873f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
28740be760fbSHong Zhang   }
28759263d837SHong Zhang #if defined(PETSC_USE_INFO)
28769263d837SHong Zhang   if (ai[am] != 0) {
28779263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
28789566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
28799566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
28809566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
28819263d837SHong Zhang   } else {
28829566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
28839263d837SHong Zhang   }
28849263d837SHong Zhang #endif
288535233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
28860be760fbSHong Zhang   PetscFunctionReturn(0);
28870be760fbSHong Zhang }
28880be760fbSHong Zhang 
28899371c9d4SSatish Balay PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2890f76d2b81SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
289110c27e3dSHong Zhang   Mat_SeqSBAIJ      *b;
28929186b105SHong Zhang   PetscBool          perm_identity, missing;
289310c27e3dSHong Zhang   PetscReal          fill = info->fill;
28945d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
28955d0c19d7SBarry Smith   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
289610c27e3dSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
28972ed133dbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
28980298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
289910c27e3dSHong Zhang   PetscBT            lnkbt;
29002ed133dbSHong Zhang   IS                 iperm;
2901f76d2b81SHong Zhang 
2902f76d2b81SHong Zhang   PetscFunctionBegin;
290308401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
29049566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
290528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
29069186b105SHong Zhang 
29072ed133dbSHong Zhang   /* check whether perm is the identity mapping */
29089566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
29099566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
29109566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
29119566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
291210c27e3dSHong Zhang 
291310c27e3dSHong Zhang   /* initialization */
29149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
291510c27e3dSHong Zhang   ui[0] = 0;
291610c27e3dSHong Zhang 
291710c27e3dSHong Zhang   /* jl: linked list for storing indices of the pivot rows
29181393bc97SHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
29199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
29201393bc97SHong Zhang   for (i = 0; i < am; i++) {
29219371c9d4SSatish Balay     jl[i] = am;
29229371c9d4SSatish Balay     il[i] = 0;
2923f76d2b81SHong Zhang   }
292410c27e3dSHong Zhang 
292510c27e3dSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
29261393bc97SHong Zhang   nlnk = am + 1;
29279566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
292810c27e3dSHong Zhang 
29291393bc97SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
29309566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
293110c27e3dSHong Zhang   current_space = free_space;
293210c27e3dSHong Zhang 
29331393bc97SHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
293410c27e3dSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
293510c27e3dSHong Zhang     nzk   = 0;
29362ed133dbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
293794bad497SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
29382ed133dbSHong Zhang     ncols_upper = 0;
29392ed133dbSHong Zhang     for (j = 0; j < ncols; j++) {
29409bfd6278SHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
29412ed133dbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
29422ed133dbSHong Zhang         cols[ncols_upper] = i;
29432ed133dbSHong Zhang         ncols_upper++;
29442ed133dbSHong Zhang       }
29452ed133dbSHong Zhang     }
29469566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
294710c27e3dSHong Zhang     nzk += nlnk;
294810c27e3dSHong Zhang 
294910c27e3dSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
295010c27e3dSHong Zhang     prow = jl[k]; /* 1st pivot row */
295110c27e3dSHong Zhang 
295210c27e3dSHong Zhang     while (prow < k) {
295310c27e3dSHong Zhang       nextprow = jl[prow];
295410c27e3dSHong Zhang       /* merge prow into k-th row */
29551393bc97SHong Zhang       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
295610c27e3dSHong Zhang       jmax     = ui[prow + 1];
295710c27e3dSHong Zhang       ncols    = jmax - jmin;
29581393bc97SHong Zhang       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
29599566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
296010c27e3dSHong Zhang       nzk += nlnk;
296110c27e3dSHong Zhang 
296210c27e3dSHong Zhang       /* update il and jl for prow */
296310c27e3dSHong Zhang       if (jmin < jmax) {
296410c27e3dSHong Zhang         il[prow] = jmin;
29659371c9d4SSatish Balay         j        = *uj_ptr;
29669371c9d4SSatish Balay         jl[prow] = jl[j];
29679371c9d4SSatish Balay         jl[j]    = prow;
296810c27e3dSHong Zhang       }
296910c27e3dSHong Zhang       prow = nextprow;
297010c27e3dSHong Zhang     }
297110c27e3dSHong Zhang 
297210c27e3dSHong Zhang     /* if free space is not available, make more free space */
297310c27e3dSHong Zhang     if (current_space->local_remaining < nzk) {
29741393bc97SHong Zhang       i = am - k + 1;                     /* num of unfactored rows */
297510c27e3dSHong Zhang       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
29769566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
297710c27e3dSHong Zhang       reallocs++;
297810c27e3dSHong Zhang     }
297910c27e3dSHong Zhang 
298010c27e3dSHong Zhang     /* copy data into free space, then initialize lnk */
29819566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
298210c27e3dSHong Zhang 
298310c27e3dSHong Zhang     /* add the k-th row into il and jl */
298410c27e3dSHong Zhang     if (nzk - 1 > 0) {
29851393bc97SHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
29869371c9d4SSatish Balay       jl[k] = jl[i];
29879371c9d4SSatish Balay       jl[i] = k;
298810c27e3dSHong Zhang       il[k] = ui[k] + 1;
298910c27e3dSHong Zhang     }
29902ed133dbSHong Zhang     ui_ptr[k] = current_space->array;
29912205254eSKarl Rupp 
299210c27e3dSHong Zhang     current_space->array += nzk;
299310c27e3dSHong Zhang     current_space->local_used += nzk;
299410c27e3dSHong Zhang     current_space->local_remaining -= nzk;
299510c27e3dSHong Zhang 
299610c27e3dSHong Zhang     ui[k + 1] = ui[k] + nzk;
299710c27e3dSHong Zhang   }
299810c27e3dSHong Zhang 
29996cf91177SBarry Smith #if defined(PETSC_USE_INFO)
30001393bc97SHong Zhang   if (ai[am] != 0) {
30011393bc97SHong Zhang     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
30029566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
30039566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
30049566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
300510c27e3dSHong Zhang   } else {
30069566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
300710c27e3dSHong Zhang   }
300863ba0a88SBarry Smith #endif
300910c27e3dSHong Zhang 
30109566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
30119566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
30129566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
301310c27e3dSHong Zhang 
301410c27e3dSHong Zhang   /* destroy list of free space and other temporary array(s) */
30159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
30169566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
30179566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
301810c27e3dSHong Zhang 
301910c27e3dSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
302010c27e3dSHong Zhang 
3021f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
302210c27e3dSHong Zhang   b->singlemalloc = PETSC_FALSE;
3023e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
3024e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
30252205254eSKarl Rupp 
30269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
30272205254eSKarl Rupp 
302810c27e3dSHong Zhang   b->j    = uj;
302910c27e3dSHong Zhang   b->i    = ui;
3030f4259b30SLisandro Dalcin   b->diag = NULL;
3031f4259b30SLisandro Dalcin   b->ilen = NULL;
3032f4259b30SLisandro Dalcin   b->imax = NULL;
303310c27e3dSHong Zhang   b->row  = perm;
30349bfd6278SHong Zhang   b->col  = perm;
30352205254eSKarl Rupp 
30369566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30379566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30382205254eSKarl Rupp 
30399bfd6278SHong Zhang   b->icol          = iperm;
304010c27e3dSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
30412205254eSKarl Rupp 
30429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
30431393bc97SHong Zhang   b->maxnz = b->nz = ui[am];
304410c27e3dSHong Zhang 
3045f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
3046f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
30471393bc97SHong Zhang   if (ai[am] != 0) {
3048f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
304910c27e3dSHong Zhang   } else {
3050f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
305110c27e3dSHong Zhang   }
30520a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3053f76d2b81SHong Zhang   PetscFunctionReturn(0);
3054f76d2b81SHong Zhang }
3055599ef60dSHong Zhang 
30569371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) {
3057813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3058813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
3059813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3060813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
3061813a1f2bSShri Abhyankar   const PetscScalar *b;
3062813a1f2bSShri Abhyankar   const MatScalar   *aa = a->a, *v;
3063813a1f2bSShri Abhyankar   PetscInt           i, nz;
3064813a1f2bSShri Abhyankar 
3065813a1f2bSShri Abhyankar   PetscFunctionBegin;
3066813a1f2bSShri Abhyankar   if (!n) PetscFunctionReturn(0);
3067813a1f2bSShri Abhyankar 
30689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
30699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3070813a1f2bSShri Abhyankar 
3071813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
3072813a1f2bSShri Abhyankar   x[0] = b[0];
3073813a1f2bSShri Abhyankar   v    = aa;
3074813a1f2bSShri Abhyankar   vi   = aj;
3075813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
3076813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
3077813a1f2bSShri Abhyankar     sum = b[i];
3078813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3079813a1f2bSShri Abhyankar     v += nz;
3080813a1f2bSShri Abhyankar     vi += nz;
3081813a1f2bSShri Abhyankar     x[i] = sum;
3082813a1f2bSShri Abhyankar   }
3083813a1f2bSShri Abhyankar 
3084813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
308562fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
30863c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
30873c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3088813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3089813a1f2bSShri Abhyankar     sum = x[i];
3090813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
30913c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3092813a1f2bSShri Abhyankar   }
3093813a1f2bSShri Abhyankar 
30949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
30959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
30969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
3097813a1f2bSShri Abhyankar   PetscFunctionReturn(0);
3098813a1f2bSShri Abhyankar }
3099813a1f2bSShri Abhyankar 
31009371c9d4SSatish Balay PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) {
3101f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3102f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
3103f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3104f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
3105f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
3106f268cba8SShri Abhyankar   const PetscScalar *b;
3107f268cba8SShri Abhyankar   const MatScalar   *aa = a->a, *v;
3108f268cba8SShri Abhyankar 
3109f268cba8SShri Abhyankar   PetscFunctionBegin;
3110f268cba8SShri Abhyankar   if (!n) PetscFunctionReturn(0);
3111f268cba8SShri Abhyankar 
31129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
31139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3114f268cba8SShri Abhyankar   tmp = a->solve_work;
3115f268cba8SShri Abhyankar 
31169371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
31179371c9d4SSatish Balay   r = rout;
31189371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
31199371c9d4SSatish Balay   c = cout;
3120f268cba8SShri Abhyankar 
3121f268cba8SShri Abhyankar   /* forward solve the lower triangular */
3122f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
3123f268cba8SShri Abhyankar   v      = aa;
3124f268cba8SShri Abhyankar   vi     = aj;
3125f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
3126f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
3127f268cba8SShri Abhyankar     sum = b[r[i]];
3128f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3129f268cba8SShri Abhyankar     tmp[i] = sum;
31309371c9d4SSatish Balay     v += nz;
31319371c9d4SSatish Balay     vi += nz;
3132f268cba8SShri Abhyankar   }
3133f268cba8SShri Abhyankar 
3134f268cba8SShri Abhyankar   /* backward solve the upper triangular */
3135f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
31363c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
31373c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3138f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3139f268cba8SShri Abhyankar     sum = tmp[i];
3140f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3141f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3142f268cba8SShri Abhyankar   }
3143f268cba8SShri Abhyankar 
31449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
31459566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
31469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
31479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
31489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3149f268cba8SShri Abhyankar   PetscFunctionReturn(0);
3150f268cba8SShri Abhyankar }
3151f268cba8SShri Abhyankar 
3152fe97e370SBarry Smith /*
31536aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3154fe97e370SBarry Smith */
31559371c9d4SSatish Balay PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) {
31561d098868SHong Zhang   Mat             B = *fact;
3157599ef60dSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3158599ef60dSHong Zhang   IS              isicol;
31591d098868SHong Zhang   const PetscInt *r, *ic;
31601d098868SHong Zhang   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3161dc3a2fd3SHong Zhang   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3162f61a948aSLisandro Dalcin   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
31631d098868SHong Zhang   PetscInt        nlnk, *lnk;
31641d098868SHong Zhang   PetscBT         lnkbt;
3165ace3abfcSBarry Smith   PetscBool       row_identity, icol_identity;
31668aee2decSHong Zhang   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
31671d098868SHong Zhang   const PetscInt *ics;
31681d098868SHong Zhang   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3169a2ea699eSBarry Smith   PetscReal       dt = info->dt, shift = info->shiftamount;
3170f61a948aSLisandro Dalcin   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3171ace3abfcSBarry Smith   PetscBool       missing;
3172599ef60dSHong Zhang 
3173599ef60dSHong Zhang   PetscFunctionBegin;
3174f61a948aSLisandro Dalcin   if (dt == PETSC_DEFAULT) dt = 0.005;
3175f61a948aSLisandro Dalcin   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3176f61a948aSLisandro Dalcin 
31771d098868SHong Zhang   /* ------- symbolic factorization, can be reused ---------*/
31789566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
317928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
31801d098868SHong Zhang   adiag = a->diag;
3181599ef60dSHong Zhang 
31829566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3183599ef60dSHong Zhang 
3184599ef60dSHong Zhang   /* bdiag is location of diagonal in factor */
31859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
31869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3187599ef60dSHong Zhang 
31881d098868SHong Zhang   /* allocate row pointers bi */
31899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * n + 2, &bi));
31901d098868SHong Zhang 
31911d098868SHong Zhang   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3192393d3a68SHong Zhang   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
31931d098868SHong Zhang   nnz_max = ai[n] + 2 * n * dtcount + 2;
31948fc3a347SHong Zhang 
31959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
31969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3197599ef60dSHong Zhang 
3198599ef60dSHong Zhang   /* put together the new matrix */
31999566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3200f284f12aSHong Zhang   b = (Mat_SeqAIJ *)B->data;
32012205254eSKarl Rupp 
3202599ef60dSHong Zhang   b->free_a       = PETSC_TRUE;
3203599ef60dSHong Zhang   b->free_ij      = PETSC_TRUE;
3204599ef60dSHong Zhang   b->singlemalloc = PETSC_FALSE;
32052205254eSKarl Rupp 
3206599ef60dSHong Zhang   b->a    = ba;
3207599ef60dSHong Zhang   b->j    = bj;
3208599ef60dSHong Zhang   b->i    = bi;
3209599ef60dSHong Zhang   b->diag = bdiag;
3210f4259b30SLisandro Dalcin   b->ilen = NULL;
3211f4259b30SLisandro Dalcin   b->imax = NULL;
3212599ef60dSHong Zhang   b->row  = isrow;
3213599ef60dSHong Zhang   b->col  = iscol;
32149566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
32159566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3216599ef60dSHong Zhang   b->icol = isicol;
3217599ef60dSHong Zhang 
32189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
32191d098868SHong Zhang   b->maxnz = nnz_max;
3220599ef60dSHong Zhang 
3221d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_ILUDT;
3222f284f12aSHong Zhang   B->info.factor_mallocs   = 0;
3223f284f12aSHong Zhang   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
32241d098868SHong Zhang   /* ------- end of symbolic factorization ---------*/
3225599ef60dSHong Zhang 
32269566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
32279566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
3228599ef60dSHong Zhang   ics = ic;
3229599ef60dSHong Zhang 
3230599ef60dSHong Zhang   /* linked list for storing column indices of the active row */
3231599ef60dSHong Zhang   nlnk = n + 1;
32329566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
32331d098868SHong Zhang 
32341d098868SHong Zhang   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
32359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
32361d098868SHong Zhang   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
32379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
32389566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n));
3239599ef60dSHong Zhang 
3240599ef60dSHong Zhang   bi[0]         = 0;
32418fc3a347SHong Zhang   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3242dc3a2fd3SHong Zhang   bdiag_rev[n]  = bdiag[0];
32438fc3a347SHong Zhang   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3244599ef60dSHong Zhang   for (i = 0; i < n; i++) {
3245599ef60dSHong Zhang     /* copy initial fill into linked list */
32468369b28dSHong Zhang     nzi = ai[r[i] + 1] - ai[r[i]];
324794bad497SJacob Faibussowitsch     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
32488369b28dSHong Zhang     nzi_al = adiag[r[i]] - ai[r[i]];
32498369b28dSHong Zhang     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3250599ef60dSHong Zhang     ajtmp  = aj + ai[r[i]];
32519566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3252599ef60dSHong Zhang 
3253599ef60dSHong Zhang     /* load in initial (unfactored row) */
32541d098868SHong Zhang     aatmp = a->a + ai[r[i]];
3255ad540459SPierre Jolivet     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3256599ef60dSHong Zhang 
3257599ef60dSHong Zhang     /* add pivot rows into linked list */
3258599ef60dSHong Zhang     row = lnk[n];
3259599ef60dSHong Zhang     while (row < i) {
32601d098868SHong Zhang       nzi_bl = bi[row + 1] - bi[row] + 1;
32611d098868SHong Zhang       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
32629566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3263599ef60dSHong Zhang       nzi += nlnk;
3264599ef60dSHong Zhang       row = lnk[row];
3265599ef60dSHong Zhang     }
3266599ef60dSHong Zhang 
32678369b28dSHong Zhang     /* copy data from lnk into jtmp, then initialize lnk */
32689566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3269599ef60dSHong Zhang 
3270599ef60dSHong Zhang     /* numerical factorization */
32718369b28dSHong Zhang     bjtmp = jtmp;
3272599ef60dSHong Zhang     row   = *bjtmp++; /* 1st pivot row */
3273599ef60dSHong Zhang     while (row < i) {
3274599ef60dSHong Zhang       pc         = rtmp + row;
32753c2a7987SHong Zhang       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
32763c2a7987SHong Zhang       multiplier = (*pc) * (*pv);
3277599ef60dSHong Zhang       *pc        = multiplier;
32783c2a7987SHong Zhang       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
32791d098868SHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
32801d098868SHong Zhang         pv = ba + bdiag[row + 1] + 1;
32811d098868SHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
32821d098868SHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
32839566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3284599ef60dSHong Zhang       }
3285599ef60dSHong Zhang       row = *bjtmp++;
3286599ef60dSHong Zhang     }
3287599ef60dSHong Zhang 
32888369b28dSHong Zhang     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
32898aee2decSHong Zhang     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
32909371c9d4SSatish Balay     nzi_bl   = 0;
32919371c9d4SSatish Balay     j        = 0;
32928369b28dSHong Zhang     while (jtmp[j] < i) { /* Note: jtmp is sorted */
32939371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
32949371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
32959371c9d4SSatish Balay       nzi_bl++;
32969371c9d4SSatish Balay       j++;
32978369b28dSHong Zhang     }
32988369b28dSHong Zhang     nzi_bu = nzi - nzi_bl - 1;
32998369b28dSHong Zhang     while (j < nzi) {
33009371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
33019371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
33028369b28dSHong Zhang       j++;
33038369b28dSHong Zhang     }
33048369b28dSHong Zhang 
3305599ef60dSHong Zhang     bjtmp = bj + bi[i];
3306599ef60dSHong Zhang     batmp = ba + bi[i];
33078369b28dSHong Zhang     /* apply level dropping rule to L part */
33088369b28dSHong Zhang     ncut  = nzi_al + dtcount;
33098369b28dSHong Zhang     if (ncut < nzi_bl) {
33109566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
33119566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3312599ef60dSHong Zhang     } else {
33138369b28dSHong Zhang       ncut = nzi_bl;
33148369b28dSHong Zhang     }
33158369b28dSHong Zhang     for (j = 0; j < ncut; j++) {
33168369b28dSHong Zhang       bjtmp[j] = jtmp[j];
33178369b28dSHong Zhang       batmp[j] = vtmp[j];
33188369b28dSHong Zhang     }
33191d098868SHong Zhang     bi[i + 1] = bi[i] + ncut;
33208369b28dSHong Zhang     nzi       = ncut + 1;
33218369b28dSHong Zhang 
33228369b28dSHong Zhang     /* apply level dropping rule to U part */
33238369b28dSHong Zhang     ncut = nzi_au + dtcount;
33248369b28dSHong Zhang     if (ncut < nzi_bu) {
33259566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
33269566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
33278369b28dSHong Zhang     } else {
33288369b28dSHong Zhang       ncut = nzi_bu;
33298369b28dSHong Zhang     }
33308369b28dSHong Zhang     nzi += ncut;
33311d098868SHong Zhang 
33321d098868SHong Zhang     /* mark bdiagonal */
33331d098868SHong Zhang     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3334dc3a2fd3SHong Zhang     bdiag_rev[n - i - 1] = bdiag[i + 1];
33358fc3a347SHong Zhang     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
33361d098868SHong Zhang     bjtmp                = bj + bdiag[i];
33371d098868SHong Zhang     batmp                = ba + bdiag[i];
33381d098868SHong Zhang     *bjtmp               = i;
33398aee2decSHong Zhang     *batmp               = diag_tmp; /* rtmp[i]; */
3340ad540459SPierre Jolivet     if (*batmp == 0.0) *batmp = dt + shift;
3341a5b23f4aSJose E. Roman     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
33421d098868SHong Zhang 
33431d098868SHong Zhang     bjtmp = bj + bdiag[i + 1] + 1;
33441d098868SHong Zhang     batmp = ba + bdiag[i + 1] + 1;
33458369b28dSHong Zhang     for (k = 0; k < ncut; k++) {
33461d098868SHong Zhang       bjtmp[k] = jtmp[nzi_bl + 1 + k];
33471d098868SHong Zhang       batmp[k] = vtmp[nzi_bl + 1 + k];
3348599ef60dSHong Zhang     }
3349599ef60dSHong Zhang 
3350599ef60dSHong Zhang     im[i] = nzi; /* used by PetscLLAddSortedLU() */
33518369b28dSHong Zhang   }              /* for (i=0; i<n; i++) */
335263a3b9bcSJacob Faibussowitsch   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3353599ef60dSHong Zhang 
33549566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
33559566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3356599ef60dSHong Zhang 
33579566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
33589566063dSJacob Faibussowitsch   PetscCall(PetscFree2(im, jtmp));
33599566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, vtmp));
33609566063dSJacob Faibussowitsch   PetscCall(PetscFree(bdiag_rev));
3361599ef60dSHong Zhang 
33629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n));
33631d098868SHong Zhang   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3364599ef60dSHong Zhang 
33659566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
33669566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &icol_identity));
3367599ef60dSHong Zhang   if (row_identity && icol_identity) {
336835233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3369599ef60dSHong Zhang   } else {
337035233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ;
3371599ef60dSHong Zhang   }
3372599ef60dSHong Zhang 
3373f4259b30SLisandro Dalcin   B->ops->solveadd          = NULL;
3374f4259b30SLisandro Dalcin   B->ops->solvetranspose    = NULL;
3375f4259b30SLisandro Dalcin   B->ops->solvetransposeadd = NULL;
3376f4259b30SLisandro Dalcin   B->ops->matsolve          = NULL;
3377599ef60dSHong Zhang   B->assembled              = PETSC_TRUE;
3378599ef60dSHong Zhang   B->preallocated           = PETSC_TRUE;
3379599ef60dSHong Zhang   PetscFunctionReturn(0);
3380599ef60dSHong Zhang }
3381599ef60dSHong Zhang 
33823c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */
3383fe97e370SBarry Smith /*
33846aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3385fe97e370SBarry Smith */
3386fe97e370SBarry Smith 
33879371c9d4SSatish Balay PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) {
33883c2a7987SHong Zhang   PetscFunctionBegin;
33899566063dSJacob Faibussowitsch   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
33903c2a7987SHong Zhang   PetscFunctionReturn(0);
33913c2a7987SHong Zhang }
33923c2a7987SHong Zhang 
33933c2a7987SHong Zhang /*
33943c2a7987SHong Zhang    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
33953c2a7987SHong Zhang    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
33963c2a7987SHong Zhang */
3397fe97e370SBarry Smith /*
33986aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3399fe97e370SBarry Smith */
3400fe97e370SBarry Smith 
34019371c9d4SSatish Balay PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) {
34023c2a7987SHong Zhang   Mat             C = fact;
34033c2a7987SHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
34043c2a7987SHong Zhang   IS              isrow = b->row, isicol = b->icol;
34053c2a7987SHong Zhang   const PetscInt *r, *ic, *ics;
3406b78a477dSHong Zhang   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3407b78a477dSHong Zhang   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
34083c2a7987SHong Zhang   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3409182b8fbaSHong Zhang   PetscReal       dt = info->dt, shift = info->shiftamount;
3410ace3abfcSBarry Smith   PetscBool       row_identity, col_identity;
34113c2a7987SHong Zhang 
34123c2a7987SHong Zhang   PetscFunctionBegin;
34139566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
34149566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
34159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
34163c2a7987SHong Zhang   ics = ic;
34173c2a7987SHong Zhang 
34183c2a7987SHong Zhang   for (i = 0; i < n; i++) {
3419b78a477dSHong Zhang     /* initialize rtmp array */
3420b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
34213c2a7987SHong Zhang     bjtmp = bj + bi[i];
3422b78a477dSHong Zhang     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3423b78a477dSHong Zhang     rtmp[i] = 0.0;
3424b78a477dSHong Zhang     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3425b78a477dSHong Zhang     bjtmp   = bj + bdiag[i + 1] + 1;
3426b78a477dSHong Zhang     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
34273c2a7987SHong Zhang 
3428b78a477dSHong Zhang     /* load in initial unfactored row of A */
34293c2a7987SHong Zhang     nz    = ai[r[i] + 1] - ai[r[i]];
34303c2a7987SHong Zhang     ajtmp = aj + ai[r[i]];
34313c2a7987SHong Zhang     v     = aa + ai[r[i]];
3432ad540459SPierre Jolivet     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
34333c2a7987SHong Zhang 
3434b78a477dSHong Zhang     /* numerical factorization */
3435b78a477dSHong Zhang     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3436b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3437b78a477dSHong Zhang     k     = 0;
3438b78a477dSHong Zhang     while (k < nzl) {
34393c2a7987SHong Zhang       row        = *bjtmp++;
34403c2a7987SHong Zhang       pc         = rtmp + row;
3441b78a477dSHong Zhang       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3442b78a477dSHong Zhang       multiplier = (*pc) * (*pv);
34433c2a7987SHong Zhang       *pc        = multiplier;
3444b78a477dSHong Zhang       if (PetscAbsScalar(multiplier) > dt) {
3445b78a477dSHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3446b78a477dSHong Zhang         pv = b->a + bdiag[row + 1] + 1;
3447b78a477dSHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3448b78a477dSHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
34499566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
34503c2a7987SHong Zhang       }
3451b78a477dSHong Zhang       k++;
34523c2a7987SHong Zhang     }
34533c2a7987SHong Zhang 
3454b78a477dSHong Zhang     /* finished row so stick it into b->a */
3455b78a477dSHong Zhang     /* L-part */
3456b78a477dSHong Zhang     pv  = b->a + bi[i];
3457b78a477dSHong Zhang     pj  = bj + bi[i];
3458b78a477dSHong Zhang     nzl = bi[i + 1] - bi[i];
3459ad540459SPierre Jolivet     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3460b78a477dSHong Zhang 
3461a5b23f4aSJose E. Roman     /* diagonal: invert diagonal entries for simpler triangular solves */
3462b78a477dSHong Zhang     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3463b78a477dSHong Zhang     b->a[bdiag[i]] = 1.0 / rtmp[i];
3464b78a477dSHong Zhang 
3465b78a477dSHong Zhang     /* U-part */
3466b78a477dSHong Zhang     pv  = b->a + bdiag[i + 1] + 1;
3467b78a477dSHong Zhang     pj  = bj + bdiag[i + 1] + 1;
3468b78a477dSHong Zhang     nzu = bdiag[i] - bdiag[i + 1] - 1;
3469ad540459SPierre Jolivet     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3470b78a477dSHong Zhang   }
3471b78a477dSHong Zhang 
34729566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
34739566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
34749566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3475b78a477dSHong Zhang 
34769566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
34779566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
34783c2a7987SHong Zhang   if (row_identity && col_identity) {
347935233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
34803c2a7987SHong Zhang   } else {
348135233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
34823c2a7987SHong Zhang   }
3483f4259b30SLisandro Dalcin   C->ops->solveadd          = NULL;
3484f4259b30SLisandro Dalcin   C->ops->solvetranspose    = NULL;
3485f4259b30SLisandro Dalcin   C->ops->solvetransposeadd = NULL;
3486f4259b30SLisandro Dalcin   C->ops->matsolve          = NULL;
34873c2a7987SHong Zhang   C->assembled              = PETSC_TRUE;
34883c2a7987SHong Zhang   C->preallocated           = PETSC_TRUE;
34892205254eSKarl Rupp 
34909566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
34913c2a7987SHong Zhang   PetscFunctionReturn(0);
34923c2a7987SHong Zhang }
3493