xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bbea24aa56d87d3065f0603396f6e854e33a88f9)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
3c6db04a5SJed Brown #include <petscbt.h>
4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
53b2fbd54SBarry Smith 
68fa24674SBarry Smith /*
78fa24674SBarry Smith       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8312e372aSBarry Smith 
9312e372aSBarry Smith       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
108fa24674SBarry Smith */
11ff6a9541SJacob Faibussowitsch #if 0
12d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13d71ae5a4SJacob Faibussowitsch {
148fa24674SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15889a8dedSBarry Smith   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
168fa24674SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j;
178fa24674SBarry Smith   const PetscScalar *aa = a->a;
18ace3abfcSBarry Smith   PetscBool         *done;
19889a8dedSBarry Smith   PetscReal          best, past = 0, future;
203a40ed3dSBarry Smith 
218fa24674SBarry Smith   PetscFunctionBegin;
228fa24674SBarry Smith   /* pick initial row */
238fa24674SBarry Smith   best = -1;
248fa24674SBarry Smith   for (i = 0; i < n; i++) {
2575567043SBarry Smith     future = 0.0;
268fa24674SBarry Smith     for (j = ai[i]; j < ai[i + 1]; j++) {
27db4deed7SKarl Rupp       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28db4deed7SKarl Rupp       else past = PetscAbsScalar(aa[j]);
298fa24674SBarry Smith     }
308fa24674SBarry Smith     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
318fa24674SBarry Smith     if (past / future > best) {
328fa24674SBarry Smith       best    = past / future;
338fa24674SBarry Smith       current = i;
348fa24674SBarry Smith     }
358fa24674SBarry Smith   }
368fa24674SBarry Smith 
379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &done));
389566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(done, n));
399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &order));
408fa24674SBarry Smith   order[0] = current;
418fa24674SBarry Smith   for (i = 0; i < n - 1; i++) {
428fa24674SBarry Smith     done[current] = PETSC_TRUE;
438fa24674SBarry Smith     best          = -1;
448fa24674SBarry Smith     /* loop over all neighbors of current pivot */
458fa24674SBarry Smith     for (j = ai[current]; j < ai[current + 1]; j++) {
468fa24674SBarry Smith       jj = aj[j];
478fa24674SBarry Smith       if (done[jj]) continue;
488fa24674SBarry Smith       /* loop over columns of potential next row computing weights for below and above diagonal */
498fa24674SBarry Smith       past = future = 0.0;
508fa24674SBarry Smith       for (k = ai[jj]; k < ai[jj + 1]; k++) {
518fa24674SBarry Smith         kk = aj[k];
528fa24674SBarry Smith         if (done[kk]) past += PetscAbsScalar(aa[k]);
538fa24674SBarry Smith         else if (kk != jj) future += PetscAbsScalar(aa[k]);
548fa24674SBarry Smith       }
558fa24674SBarry Smith       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
568fa24674SBarry Smith       if (past / future > best) {
578fa24674SBarry Smith         best       = past / future;
588fa24674SBarry Smith         newcurrent = jj;
598fa24674SBarry Smith       }
608fa24674SBarry Smith     }
618fa24674SBarry Smith     if (best == -1) { /* no neighbors to select from so select best of all that remain */
628fa24674SBarry Smith       best = -1;
638fa24674SBarry Smith       for (k = 0; k < n; k++) {
648fa24674SBarry Smith         if (done[k]) continue;
6575567043SBarry Smith         future = 0.0;
6675567043SBarry Smith         past   = 0.0;
678fa24674SBarry Smith         for (j = ai[k]; j < ai[k + 1]; j++) {
688fa24674SBarry Smith           kk = aj[j];
698fa24674SBarry Smith           if (done[kk]) past += PetscAbsScalar(aa[j]);
708fa24674SBarry Smith           else if (kk != k) future += PetscAbsScalar(aa[j]);
718fa24674SBarry Smith         }
728fa24674SBarry Smith         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
738fa24674SBarry Smith         if (past / future > best) {
748fa24674SBarry Smith           best       = past / future;
758fa24674SBarry Smith           newcurrent = k;
768fa24674SBarry Smith         }
778fa24674SBarry Smith       }
788fa24674SBarry Smith     }
7908401ef6SPierre Jolivet     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
808fa24674SBarry Smith     current      = newcurrent;
818fa24674SBarry Smith     order[i + 1] = current;
828fa24674SBarry Smith   }
839566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
848fa24674SBarry Smith   *icol = *irow;
859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)*irow));
869566063dSJacob Faibussowitsch   PetscCall(PetscFree(done));
879566063dSJacob Faibussowitsch   PetscCall(PetscFree(order));
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
893b2fbd54SBarry Smith }
90ff6a9541SJacob Faibussowitsch #endif
913b2fbd54SBarry Smith 
92d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93d71ae5a4SJacob Faibussowitsch {
944ac6704cSBarry Smith   PetscFunctionBegin;
954ac6704cSBarry Smith   *type = MATSOLVERPETSC;
963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
974ac6704cSBarry Smith }
984ac6704cSBarry Smith 
99d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100d71ae5a4SJacob Faibussowitsch {
101d0f46423SBarry Smith   PetscInt n = A->rmap->n;
102b24902e0SBarry Smith 
103b24902e0SBarry Smith   PetscFunctionBegin;
104891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX)
10503e5aca4SStefano Zampini   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
10603e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
10703e5aca4SStefano Zampini     *B = NULL;
10803e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
10903e5aca4SStefano Zampini   }
110891bceaeSHong Zhang #endif
11103e5aca4SStefano Zampini 
1129566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
1139566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
114599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
1159566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
1162205254eSKarl Rupp 
11735233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
11835233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
1192205254eSKarl Rupp 
1209566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
1219566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
1229566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
1239566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1259566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
1269566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
1272205254eSKarl Rupp 
12835233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
12935233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
1309566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
1319566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133d5f3da31SBarry Smith   (*B)->factortype = ftype;
13400c67f3bSHong Zhang 
1359566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
1369566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
1389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
1393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140b24902e0SBarry Smith }
141b24902e0SBarry Smith 
142ff6a9541SJacob Faibussowitsch #if 0
143ff6a9541SJacob Faibussowitsch // currently unused
144ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145d71ae5a4SJacob Faibussowitsch {
146416022c9SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147289bc588SBarry Smith   IS                 isicol;
1485d0c19d7SBarry Smith   const PetscInt    *r, *ic;
1495d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
1501393bc97SHong Zhang   PetscInt          *bi, *bj, *ajtmp;
1511393bc97SHong Zhang   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
1529e046878SBarry Smith   PetscReal          f;
1534875ba38SHong Zhang   PetscInt           nlnk, *lnk, k, **bi_ptr;
1540298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1551393bc97SHong Zhang   PetscBT            lnkbt;
156ece542f9SHong Zhang   PetscBool          missing;
157289bc588SBarry Smith 
1583a40ed3dSBarry Smith   PetscFunctionBegin;
15908401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
1609566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
16128b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162ece542f9SHong Zhang 
1639566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1649566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
1659566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
166289bc588SBarry Smith 
167289bc588SBarry Smith   /* get new row pointers */
1689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
1691393bc97SHong Zhang   bi[0] = 0;
1701393bc97SHong Zhang 
1711393bc97SHong Zhang   /* bdiag is location of diagonal in factor */
1729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1731393bc97SHong Zhang   bdiag[0] = 0;
1741393bc97SHong Zhang 
1751393bc97SHong Zhang   /* linked list for storing column indices of the active row */
1761393bc97SHong Zhang   nlnk = n + 1;
1779566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
1781393bc97SHong Zhang 
1799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
1801393bc97SHong Zhang 
1811393bc97SHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
182d3d32019SBarry Smith   f = info->fill;
183aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
1849566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1851393bc97SHong Zhang   current_space = free_space;
186289bc588SBarry Smith 
187289bc588SBarry Smith   for (i = 0; i < n; i++) {
1881393bc97SHong Zhang     /* copy previous fill into linked list */
189289bc588SBarry Smith     nzi   = 0;
1901393bc97SHong Zhang     nnz   = ai[r[i] + 1] - ai[r[i]];
1911393bc97SHong Zhang     ajtmp = aj + ai[r[i]];
1929566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1931393bc97SHong Zhang     nzi += nlnk;
1941393bc97SHong Zhang 
1951393bc97SHong Zhang     /* add pivot rows into linked list */
1961393bc97SHong Zhang     row = lnk[n];
1971393bc97SHong Zhang     while (row < i) {
198d1170560SHong Zhang       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199d1170560SHong Zhang       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
2009566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
2011393bc97SHong Zhang       nzi += nlnk;
2021393bc97SHong Zhang       row = lnk[row];
203289bc588SBarry Smith     }
2041393bc97SHong Zhang     bi[i + 1] = bi[i] + nzi;
2051393bc97SHong Zhang     im[i]     = nzi;
2061393bc97SHong Zhang 
2071393bc97SHong Zhang     /* mark bdiag */
2081393bc97SHong Zhang     nzbd = 0;
2091393bc97SHong Zhang     nnz  = nzi;
2101393bc97SHong Zhang     k    = lnk[n];
2111393bc97SHong Zhang     while (nnz-- && k < i) {
2121393bc97SHong Zhang       nzbd++;
2131393bc97SHong Zhang       k = lnk[k];
2141393bc97SHong Zhang     }
2151393bc97SHong Zhang     bdiag[i] = bi[i] + nzbd;
2161393bc97SHong Zhang 
2171393bc97SHong Zhang     /* if free space is not available, make more free space */
2181393bc97SHong Zhang     if (current_space->local_remaining < nzi) {
219f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
2209566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2211393bc97SHong Zhang       reallocs++;
2221393bc97SHong Zhang     }
2231393bc97SHong Zhang 
2241393bc97SHong Zhang     /* copy data into free space, then initialize lnk */
2259566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
2262205254eSKarl Rupp 
2271393bc97SHong Zhang     bi_ptr[i] = current_space->array;
2281393bc97SHong Zhang     current_space->array += nzi;
2291393bc97SHong Zhang     current_space->local_used += nzi;
2301393bc97SHong Zhang     current_space->local_remaining -= nzi;
231289bc588SBarry Smith   }
2326cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
233464e8d44SSatish Balay   if (ai[n] != 0) {
2341393bc97SHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2369566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2379566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
2389566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
23905bf559cSSatish Balay   } else {
2409566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
24105bf559cSSatish Balay   }
24263ba0a88SBarry Smith   #endif
2432fb3ed76SBarry Smith 
2449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
2459566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
2461987afe7SBarry Smith 
2471393bc97SHong Zhang   /* destroy list of free space and other temporary array(s) */
2489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2499566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
2509566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
2519566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
252289bc588SBarry Smith 
253289bc588SBarry Smith   /* put together the new matrix */
2549566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255719d5645SBarry Smith   b = (Mat_SeqAIJ *)(B)->data;
2562205254eSKarl Rupp 
257e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
258e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
2597c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
2602205254eSKarl Rupp 
2619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
2621393bc97SHong Zhang   b->j    = bj;
2631393bc97SHong Zhang   b->i    = bi;
2641393bc97SHong Zhang   b->diag = bdiag;
265f4259b30SLisandro Dalcin   b->ilen = NULL;
266f4259b30SLisandro Dalcin   b->imax = NULL;
267416022c9SBarry Smith   b->row  = isrow;
268416022c9SBarry Smith   b->col  = iscol;
2699566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
2709566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
27182bf6240SBarry Smith   b->icol = isicol;
2729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2731393bc97SHong Zhang 
2741393bc97SHong Zhang   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
2751393bc97SHong Zhang   b->maxnz = b->nz = bi[n];
2767fd98bd6SLois Curfman McInnes 
277d5f3da31SBarry Smith   (B)->factortype            = MAT_FACTOR_LU;
278719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
279719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
280703deb49SSatish Balay 
2819f7d0b68SHong Zhang   if (ai[n]) {
282719d5645SBarry Smith     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283eea2913aSSatish Balay   } else {
284719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
285eea2913aSSatish Balay   }
286ad04f41aSHong Zhang   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287ad540459SPierre Jolivet   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
289289bc588SBarry Smith }
290ff6a9541SJacob Faibussowitsch #endif
2911393bc97SHong Zhang 
292d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293d71ae5a4SJacob Faibussowitsch {
294ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
295ce3d78c0SShri Abhyankar   IS                 isicol;
2968758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
2978758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
2988758e1faSBarry Smith   PetscInt          *bi, *bj;
299ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300ce3d78c0SShri Abhyankar   PetscReal          f;
301ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
3020298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
304ece542f9SHong Zhang   PetscBool          missing;
305ce3d78c0SShri Abhyankar 
306ce3d78c0SShri Abhyankar   PetscFunctionBegin;
30708401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
3089566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
30928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
310ece542f9SHong Zhang 
3119566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3129566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
3139566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
314ce3d78c0SShri Abhyankar 
3151bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
3169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
3179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
318a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
3199b48462bSShri Abhyankar 
3209b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
3219b48462bSShri Abhyankar   nlnk = n + 1;
3229566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3239b48462bSShri Abhyankar 
3249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
3259b48462bSShri Abhyankar 
3269b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
3279b48462bSShri Abhyankar   f = info->fill;
328aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
3299566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
3309b48462bSShri Abhyankar   current_space = free_space;
3319b48462bSShri Abhyankar 
3329b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
3339b48462bSShri Abhyankar     /* copy previous fill into linked list */
3349b48462bSShri Abhyankar     nzi   = 0;
3359b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
3369b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
3379566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3389b48462bSShri Abhyankar     nzi += nlnk;
3399b48462bSShri Abhyankar 
3409b48462bSShri Abhyankar     /* add pivot rows into linked list */
3419b48462bSShri Abhyankar     row = lnk[n];
3429b48462bSShri Abhyankar     while (row < i) {
3439b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
3449b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
3459566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
3469b48462bSShri Abhyankar       nzi += nlnk;
3479b48462bSShri Abhyankar       row = lnk[row];
3489b48462bSShri Abhyankar     }
3499b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
3509b48462bSShri Abhyankar     im[i]     = nzi;
3519b48462bSShri Abhyankar 
3529b48462bSShri Abhyankar     /* mark bdiag */
3539b48462bSShri Abhyankar     nzbd = 0;
3549b48462bSShri Abhyankar     nnz  = nzi;
3559b48462bSShri Abhyankar     k    = lnk[n];
3569b48462bSShri Abhyankar     while (nnz-- && k < i) {
3579b48462bSShri Abhyankar       nzbd++;
3589b48462bSShri Abhyankar       k = lnk[k];
3599b48462bSShri Abhyankar     }
3609b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
3619b48462bSShri Abhyankar 
3629b48462bSShri Abhyankar     /* if free space is not available, make more free space */
3639b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
3642f18eb33SBarry Smith       /* estimated additional space needed */
3651df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
3669566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
3679b48462bSShri Abhyankar       reallocs++;
3689b48462bSShri Abhyankar     }
3699b48462bSShri Abhyankar 
3709b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
3719566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
3722205254eSKarl Rupp 
3739b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
3749b48462bSShri Abhyankar     current_space->array += nzi;
3759b48462bSShri Abhyankar     current_space->local_used += nzi;
3769b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
3779b48462bSShri Abhyankar   }
3789b48462bSShri Abhyankar 
3799566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3809566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3819b48462bSShri Abhyankar 
3829263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
3839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
3849566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
3859566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
3869566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
3879b48462bSShri Abhyankar 
3889b48462bSShri Abhyankar   /* put together the new matrix */
3899566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3909b48462bSShri Abhyankar   b = (Mat_SeqAIJ *)(B)->data;
3912205254eSKarl Rupp 
3929b48462bSShri Abhyankar   b->free_a       = PETSC_TRUE;
3939b48462bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
3949b48462bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
3952205254eSKarl Rupp 
3969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
3972205254eSKarl Rupp 
3989b48462bSShri Abhyankar   b->j    = bj;
3999b48462bSShri Abhyankar   b->i    = bi;
4009b48462bSShri Abhyankar   b->diag = bdiag;
401f4259b30SLisandro Dalcin   b->ilen = NULL;
402f4259b30SLisandro Dalcin   b->imax = NULL;
4039b48462bSShri Abhyankar   b->row  = isrow;
4049b48462bSShri Abhyankar   b->col  = iscol;
4059566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
4069566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
4079b48462bSShri Abhyankar   b->icol = isicol;
4089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
4099b48462bSShri Abhyankar 
4109b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
4119b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
4122205254eSKarl Rupp 
413d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
414f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
415f284f12aSHong Zhang   B->info.fill_ratio_given = f;
4169b48462bSShri Abhyankar 
4179b48462bSShri Abhyankar   if (ai[n]) {
418f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
4199b48462bSShri Abhyankar   } else {
420f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
4219b48462bSShri Abhyankar   }
4229263d837SHong Zhang #if defined(PETSC_USE_INFO)
4239263d837SHong Zhang   if (ai[n] != 0) {
4249263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
4259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
4269566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
4279566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
4289566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
4299263d837SHong Zhang   } else {
4309566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
4319263d837SHong Zhang   }
4329263d837SHong Zhang #endif
43335233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434ad540459SPierre Jolivet   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
4359566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
4363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4379b48462bSShri Abhyankar }
4389b48462bSShri Abhyankar 
4395b5bc046SBarry Smith /*
4405b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
4415b5bc046SBarry Smith */
442d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A)
443d71ae5a4SJacob Faibussowitsch {
444ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
4455b5bc046SBarry Smith 
4465b5bc046SBarry Smith   PetscFunctionBegin;
4479566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
4485b5bc046SBarry Smith   if (flg) {
4495b5bc046SBarry Smith     PetscViewer viewer;
4505b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
4515b5bc046SBarry Smith 
4529566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
4539566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
4549566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
4559566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
4565b5bc046SBarry Smith   }
4573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4585b5bc046SBarry Smith }
4595b5bc046SBarry Smith 
460d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461d71ae5a4SJacob Faibussowitsch {
462c9c72f8fSHong Zhang   Mat              C = B;
463c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
465c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
466bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
468bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
469bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
470bbd65245SShri Abhyankar   const MatScalar *aa = a->a, *v;
471ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
472d90ac83dSHong Zhang   FactorShiftCtx   sctx;
4734f81c4b7SBarry Smith   const PetscInt  *ddiag;
474d4ad06f7SHong Zhang   PetscReal        rs;
475d4ad06f7SHong Zhang   MatScalar        d;
476d4ad06f7SHong Zhang 
477c9c72f8fSHong Zhang   PetscFunctionBegin;
478d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4799566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
480d4ad06f7SHong Zhang 
481f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482d4ad06f7SHong Zhang     ddiag          = a->diag;
483d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
484d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
485d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486d4ad06f7SHong Zhang       d  = (aa)[ddiag[i]];
487d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
488d4ad06f7SHong Zhang       v  = aa + ai[i];
489d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
4902205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
492d4ad06f7SHong Zhang     }
493d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
494d4ad06f7SHong Zhang     sctx.nshift_max = 5;
495d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
496d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
497d4ad06f7SHong Zhang   }
498d4ad06f7SHong Zhang 
4999566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
5009566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
5019566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
502c9c72f8fSHong Zhang   ics = ic;
503c9c72f8fSHong Zhang 
504d4ad06f7SHong Zhang   do {
50507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
506c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
507c9c72f8fSHong Zhang       /* zero rtmp */
508c9c72f8fSHong Zhang       /* L part */
509c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
510c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
511c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512c9c72f8fSHong Zhang 
513c9c72f8fSHong Zhang       /* U part */
51462fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
51562fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
516813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
517813a1f2bSShri Abhyankar 
518813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
519813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
520813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
521813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
522ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523813a1f2bSShri Abhyankar       /* ZeropivotApply() */
52498a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
525813a1f2bSShri Abhyankar 
526813a1f2bSShri Abhyankar       /* elimination */
527813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
528813a1f2bSShri Abhyankar       row   = *bjtmp++;
529813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
530f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
531813a1f2bSShri Abhyankar         pc = rtmp + row;
532813a1f2bSShri Abhyankar         if (*pc != 0.0) {
533813a1f2bSShri Abhyankar           pv         = b->a + bdiag[row];
534813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
535813a1f2bSShri Abhyankar           *pc        = multiplier;
5362205254eSKarl Rupp 
53762fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
53862fbd6afSShri Abhyankar           pv = b->a + bdiag[row + 1] + 1;
53962fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
5402205254eSKarl Rupp 
541813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5429566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
543813a1f2bSShri Abhyankar         }
544f268cba8SShri Abhyankar         row = *bjtmp++;
545813a1f2bSShri Abhyankar       }
546813a1f2bSShri Abhyankar 
547813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
548813a1f2bSShri Abhyankar       rs = 0.0;
549813a1f2bSShri Abhyankar       /* L part */
550813a1f2bSShri Abhyankar       pv = b->a + bi[i];
551813a1f2bSShri Abhyankar       pj = b->j + bi[i];
552813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
553813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5549371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5559371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
556813a1f2bSShri Abhyankar       }
557813a1f2bSShri Abhyankar 
558813a1f2bSShri Abhyankar       /* U part */
55962fbd6afSShri Abhyankar       pv = b->a + bdiag[i + 1] + 1;
56062fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
56162fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
562813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5639371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5649371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
565813a1f2bSShri Abhyankar       }
566813a1f2bSShri Abhyankar 
567813a1f2bSShri Abhyankar       sctx.rs = rs;
568813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
5699566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
57007b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
57107b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
572813a1f2bSShri Abhyankar 
573a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
574813a1f2bSShri Abhyankar       pv  = b->a + bdiag[i];
575813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
576813a1f2bSShri Abhyankar 
577813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
578813a1f2bSShri Abhyankar 
5798ff23777SHong Zhang     /* MatPivotRefine() */
58007b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581813a1f2bSShri Abhyankar       /*
582813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
583813a1f2bSShri Abhyankar        * then try lower shift
584813a1f2bSShri Abhyankar        */
585813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
586813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
58807b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
589813a1f2bSShri Abhyankar       sctx.nshift++;
590813a1f2bSShri Abhyankar     }
59107b50cabSHong Zhang   } while (sctx.newshift);
592813a1f2bSShri Abhyankar 
5939566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5949566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5959566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
596d3ac4fa3SBarry Smith 
5979566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5989566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
599abb87a52SBarry Smith   if (b->inode.size) {
600abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
601abb87a52SBarry Smith   } else if (row_identity && col_identity) {
60235233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603813a1f2bSShri Abhyankar   } else {
60435233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
605813a1f2bSShri Abhyankar   }
60635233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
60735233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
60835233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
60935233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
610813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
611813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
6122205254eSKarl Rupp 
6139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
614813a1f2bSShri Abhyankar 
61514d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
616813a1f2bSShri Abhyankar   if (sctx.nshift) {
617f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
6189566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
619f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
6209566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
621f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
6229566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
623813a1f2bSShri Abhyankar     }
624813a1f2bSShri Abhyankar   }
6253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
626813a1f2bSShri Abhyankar }
627813a1f2bSShri Abhyankar 
628ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
629ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
630ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
631ff6a9541SJacob Faibussowitsch 
632d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
633d71ae5a4SJacob Faibussowitsch {
634719d5645SBarry Smith   Mat              C = B;
635416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
63682bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
6375d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
638d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
639d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
640d278edc7SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
641d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
642d278edc7SBarry Smith   const MatScalar *v, *aa = a->a;
64333f9893dSHong Zhang   PetscReal        rs = 0.0;
644d90ac83dSHong Zhang   FactorShiftCtx   sctx;
6454f81c4b7SBarry Smith   const PetscInt  *ddiag;
646ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
647289bc588SBarry Smith 
6483a40ed3dSBarry Smith   PetscFunctionBegin;
649504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
6509566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
651ada7143aSSatish Balay 
652f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
65342898933SHong Zhang     ddiag          = a->diag;
6549f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
6556cc28720Svictorle     for (i = 0; i < n; i++) {
6569f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
6579f7d0b68SHong Zhang       d  = (aa)[ddiag[i]];
6581a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
6599f7d0b68SHong Zhang       v  = aa + ai[i];
6609f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
6612205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
6621a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
6636cc28720Svictorle     }
664118f5789SBarry Smith     sctx.shift_top *= 1.1;
665d2276718SHong Zhang     sctx.nshift_max = 5;
666d2276718SHong Zhang     sctx.shift_lo   = 0.;
667d2276718SHong Zhang     sctx.shift_hi   = 1.;
668a0a356efSHong Zhang   }
669a0a356efSHong Zhang 
6709566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6719566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
673504a3fadSHong Zhang   ics = ic;
674504a3fadSHong Zhang 
675085256b3SBarry Smith   do {
67607b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
677289bc588SBarry Smith     for (i = 0; i < n; i++) {
678b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
679b3bf805bSHong Zhang       bjtmp = bj + bi[i];
6809f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
681289bc588SBarry Smith 
682289bc588SBarry Smith       /* load in initial (unfactored row) */
6839f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
6849f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
6859f7d0b68SHong Zhang       v     = aa + ai[r[i]];
686ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
687d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
688289bc588SBarry Smith 
689b3bf805bSHong Zhang       row = *bjtmp++;
690f2582111SSatish Balay       while (row < i) {
6918c37ef55SBarry Smith         pc = rtmp + row;
6928c37ef55SBarry Smith         if (*pc != 0.0) {
693010a20caSHong Zhang           pv         = b->a + diag_offset[row];
694010a20caSHong Zhang           pj         = b->j + diag_offset[row] + 1;
69535aab85fSBarry Smith           multiplier = *pc / *pv++;
6968c37ef55SBarry Smith           *pc        = multiplier;
697b3bf805bSHong Zhang           nz         = bi[row + 1] - diag_offset[row] - 1;
6989f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6999566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
700289bc588SBarry Smith         }
701b3bf805bSHong Zhang         row = *bjtmp++;
702289bc588SBarry Smith       }
703416022c9SBarry Smith       /* finished row so stick it into b->a */
704b3bf805bSHong Zhang       pv   = b->a + bi[i];
705b3bf805bSHong Zhang       pj   = b->j + bi[i];
706b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
707b3bf805bSHong Zhang       diag = diag_offset[i] - bi[i];
708e57ff13aSBarry Smith       rs   = 0.0;
709d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
7109f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
7119f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
712d3d32019SBarry Smith       }
713e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
714d2276718SHong Zhang 
715d2276718SHong Zhang       sctx.rs = rs;
716d2276718SHong Zhang       sctx.pv = pv[diag];
7179566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
71807b50cabSHong Zhang       if (sctx.newshift) break;
719504a3fadSHong Zhang       pv[diag] = sctx.pv;
72035aab85fSBarry Smith     }
721d2276718SHong Zhang 
72207b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
7236cc28720Svictorle       /*
7246c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
7256cc28720Svictorle        * then try lower shift
7266cc28720Svictorle        */
7270481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
7280481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
7290481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
73007b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
731d2276718SHong Zhang       sctx.nshift++;
7326cc28720Svictorle     }
73307b50cabSHong Zhang   } while (sctx.newshift);
7340f11f9aeSSatish Balay 
735a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
736ad540459SPierre Jolivet   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
7379566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7389566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7399566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
740d3ac4fa3SBarry Smith 
7419566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
7429566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
7438d8c7c43SBarry Smith   if (row_identity && col_identity) {
744ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
7458d8c7c43SBarry Smith   } else {
746ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
747db4efbfdSBarry Smith   }
748ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
749ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
750ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
7522205254eSKarl Rupp 
753c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
7545c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
7552205254eSKarl Rupp 
7569566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
757d2276718SHong Zhang   if (sctx.nshift) {
758f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7599566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
760f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7619566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
7626cc28720Svictorle     }
7630697b6caSHong Zhang   }
764d3ac4fa3SBarry Smith   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
765d3ac4fa3SBarry Smith   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
7662205254eSKarl Rupp 
7679566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
7683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
769289bc588SBarry Smith }
7700889b5dcSvictorle 
771ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
772ff6a9541SJacob Faibussowitsch 
773137fb511SHong Zhang /*
774137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
775137fb511SHong Zhang    Input:
776137fb511SHong Zhang      A - original matrix
777137fb511SHong Zhang    Output;
778137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
779137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
780137fb511SHong Zhang          a->a reordered accordingly with a->j
781137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
782137fb511SHong Zhang */
783d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
784d71ae5a4SJacob Faibussowitsch {
785b051339dSHong Zhang   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
786b051339dSHong Zhang   IS               isrow = a->row, isicol = a->icol;
7875d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
7885d0c19d7SBarry Smith   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
7895d0c19d7SBarry Smith   PetscInt        *ajtmp, nz, row;
790b051339dSHong Zhang   PetscInt        *diag = a->diag, nbdiag, *pj;
791a77337e4SBarry Smith   PetscScalar     *rtmp, *pc, multiplier, d;
792504a3fadSHong Zhang   MatScalar       *pv, *v;
793137fb511SHong Zhang   PetscReal        rs;
794d90ac83dSHong Zhang   FactorShiftCtx   sctx;
795504a3fadSHong Zhang   const MatScalar *aa = a->a, *vtmp;
796137fb511SHong Zhang 
797137fb511SHong Zhang   PetscFunctionBegin;
79808401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
799504a3fadSHong Zhang 
800504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
8019566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
802504a3fadSHong Zhang 
803504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
804504a3fadSHong Zhang     const PetscInt *ddiag = a->diag;
805504a3fadSHong Zhang     sctx.shift_top        = info->zeropivot;
806504a3fadSHong Zhang     for (i = 0; i < n; i++) {
807504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
808504a3fadSHong Zhang       d    = (aa)[ddiag[i]];
809504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
810504a3fadSHong Zhang       vtmp = aa + ai[i];
811504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
8122205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
813504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
814504a3fadSHong Zhang     }
815504a3fadSHong Zhang     sctx.shift_top *= 1.1;
816504a3fadSHong Zhang     sctx.nshift_max = 5;
817504a3fadSHong Zhang     sctx.shift_lo   = 0.;
818504a3fadSHong Zhang     sctx.shift_hi   = 1.;
819504a3fadSHong Zhang   }
820504a3fadSHong Zhang 
8219566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
8229566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
8239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
8249566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
825b051339dSHong Zhang   ics = ic;
826137fb511SHong Zhang 
827504a3fadSHong Zhang #if defined(MV)
82875567043SBarry Smith   sctx.shift_top      = 0.;
829e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
83075567043SBarry Smith   sctx.shift_lo       = 0.;
83175567043SBarry Smith   sctx.shift_hi       = 0.;
83275567043SBarry Smith   sctx.shift_fraction = 0.;
833137fb511SHong Zhang 
834f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
83575567043SBarry Smith     sctx.shift_top = 0.;
836137fb511SHong Zhang     for (i = 0; i < n; i++) {
837137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
838b051339dSHong Zhang       d  = (a->a)[diag[i]];
839137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
840b051339dSHong Zhang       v  = a->a + ai[i];
841b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
8422205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
843137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
844137fb511SHong Zhang     }
845137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846137fb511SHong Zhang     sctx.shift_top *= 1.1;
847137fb511SHong Zhang     sctx.nshift_max = 5;
848137fb511SHong Zhang     sctx.shift_lo   = 0.;
849137fb511SHong Zhang     sctx.shift_hi   = 1.;
850137fb511SHong Zhang   }
851137fb511SHong Zhang 
85275567043SBarry Smith   sctx.shift_amount = 0.;
853137fb511SHong Zhang   sctx.nshift       = 0;
854504a3fadSHong Zhang #endif
855504a3fadSHong Zhang 
856137fb511SHong Zhang   do {
85707b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
858137fb511SHong Zhang     for (i = 0; i < n; i++) {
859b051339dSHong Zhang       /* load in initial unfactored row */
860b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
861b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
862b051339dSHong Zhang       v     = a->a + ai[r[i]];
863137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
864b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
8659566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
866137fb511SHong Zhang 
867b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
868137fb511SHong Zhang       for (j = 0; j < nz; j++) {
869137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
870b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871137fb511SHong Zhang       }
872137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873137fb511SHong Zhang 
874b051339dSHong Zhang       row = *ajtmp++;
875137fb511SHong Zhang       while (row < i) {
876137fb511SHong Zhang         pc = rtmp + row;
877137fb511SHong Zhang         if (*pc != 0.0) {
878b051339dSHong Zhang           pv = a->a + diag[r[row]];
879b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
880137fb511SHong Zhang 
881137fb511SHong Zhang           multiplier = *pc / *pv++;
882137fb511SHong Zhang           *pc        = multiplier;
883b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
884b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
8859566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
886137fb511SHong Zhang         }
887b051339dSHong Zhang         row = *ajtmp++;
888137fb511SHong Zhang       }
889b051339dSHong Zhang       /* finished row so overwrite it onto a->a */
890b051339dSHong Zhang       pv     = a->a + ai[r[i]];
891b051339dSHong Zhang       pj     = aj + ai[r[i]];
892b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
893b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894137fb511SHong Zhang 
895137fb511SHong Zhang       rs = 0.0;
896137fb511SHong Zhang       for (j = 0; j < nz; j++) {
897b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
898b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899137fb511SHong Zhang       }
900137fb511SHong Zhang 
901137fb511SHong Zhang       sctx.rs = rs;
902b051339dSHong Zhang       sctx.pv = pv[nbdiag];
9039566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
90407b50cabSHong Zhang       if (sctx.newshift) break;
905504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
906137fb511SHong Zhang     }
907137fb511SHong Zhang 
90807b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
909137fb511SHong Zhang       /*
910137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
911137fb511SHong Zhang        * then try lower shift
912137fb511SHong Zhang        */
9130481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
9140481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
9150481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
91607b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
917137fb511SHong Zhang       sctx.nshift++;
918137fb511SHong Zhang     }
91907b50cabSHong Zhang   } while (sctx.newshift);
920137fb511SHong Zhang 
921a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
922ad540459SPierre Jolivet   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
923137fb511SHong Zhang 
9249566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
9259566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
9269566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
9272205254eSKarl Rupp 
928b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
929ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
930ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
931ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
9322205254eSKarl Rupp 
933b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
9345c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
9352205254eSKarl Rupp 
9369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
937137fb511SHong Zhang   if (sctx.nshift) {
938f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
9399566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
940f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
9419566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
942137fb511SHong Zhang     }
943137fb511SHong Zhang   }
9443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
945137fb511SHong Zhang }
946137fb511SHong Zhang 
947d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
948d71ae5a4SJacob Faibussowitsch {
949416022c9SBarry Smith   Mat C;
950416022c9SBarry Smith 
9513a40ed3dSBarry Smith   PetscFunctionBegin;
9529566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
9539566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
9549566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
9552205254eSKarl Rupp 
956db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
957db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
9582205254eSKarl Rupp 
9599566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
9603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
961da3a660dSBarry Smith }
9621d098868SHong Zhang 
963d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
964d71ae5a4SJacob Faibussowitsch {
965416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
966416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
9675d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
9685d0c19d7SBarry Smith   PetscInt           nz;
9695d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
970dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
971d9fead3dSBarry Smith   const PetscScalar *b;
972dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
9738c37ef55SBarry Smith 
9743a40ed3dSBarry Smith   PetscFunctionBegin;
9753ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
97691bf9adeSBarry Smith 
9779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
9789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
979416022c9SBarry Smith   tmp = a->solve_work;
9808c37ef55SBarry Smith 
9819371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
9829371c9d4SSatish Balay   r = rout;
9839371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
9849371c9d4SSatish Balay   c = cout + (n - 1);
9858c37ef55SBarry Smith 
9868c37ef55SBarry Smith   /* forward solve the lower triangular */
9878c37ef55SBarry Smith   tmp[0] = b[*r++];
988010a20caSHong Zhang   tmps   = tmp;
9898c37ef55SBarry Smith   for (i = 1; i < n; i++) {
990010a20caSHong Zhang     v   = aa + ai[i];
991010a20caSHong Zhang     vi  = aj + ai[i];
99253e7425aSBarry Smith     nz  = a->diag[i] - ai[i];
99353e7425aSBarry Smith     sum = b[*r++];
994003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
9958c37ef55SBarry Smith     tmp[i] = sum;
9968c37ef55SBarry Smith   }
9978c37ef55SBarry Smith 
9988c37ef55SBarry Smith   /* backward solve the upper triangular */
9998c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
1000010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1001010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1002416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
10038c37ef55SBarry Smith     sum = tmp[i];
1004003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1005010a20caSHong Zhang     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
10068c37ef55SBarry Smith   }
10078c37ef55SBarry Smith 
10089566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10099566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
10119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
10129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
10133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10148c37ef55SBarry Smith }
1015026e39d0SSatish Balay 
1016ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1017d71ae5a4SJacob Faibussowitsch {
10182bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10192bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
10205d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1021910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10225d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
1023910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1024910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1025910cf402Sprj-   PetscBool          isdense;
10262bebee5dSHong Zhang 
10272bebee5dSHong Zhang   PetscFunctionBegin;
10283ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
10299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
103028b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1031f9fb9879SHong Zhang   if (X != B) {
10329566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
103328b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1034f9fb9879SHong Zhang   }
10359566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10369566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10379566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10389566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10399371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10409371c9d4SSatish Balay   r = rout;
10419371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10429371c9d4SSatish Balay   c = cout;
1043a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
10442bebee5dSHong Zhang     /* forward solve the lower triangular */
10452bebee5dSHong Zhang     tmp[0] = b[r[0]];
10462bebee5dSHong Zhang     tmps   = tmp;
10472bebee5dSHong Zhang     for (i = 1; i < n; i++) {
10482bebee5dSHong Zhang       v   = aa + ai[i];
10492bebee5dSHong Zhang       vi  = aj + ai[i];
10502bebee5dSHong Zhang       nz  = a->diag[i] - ai[i];
10512bebee5dSHong Zhang       sum = b[r[i]];
1052003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10532bebee5dSHong Zhang       tmp[i] = sum;
10542bebee5dSHong Zhang     }
10552bebee5dSHong Zhang     /* backward solve the upper triangular */
10562bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
10572bebee5dSHong Zhang       v   = aa + a->diag[i] + 1;
10582bebee5dSHong Zhang       vi  = aj + a->diag[i] + 1;
10592bebee5dSHong Zhang       nz  = ai[i + 1] - a->diag[i] - 1;
10602bebee5dSHong Zhang       sum = tmp[i];
1061003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10622bebee5dSHong Zhang       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
10632bebee5dSHong Zhang     }
1064910cf402Sprj-     b += ldb;
1065910cf402Sprj-     x += ldx;
10662bebee5dSHong Zhang   }
10679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10689566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10699566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
10709566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
10719566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
10723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10732bebee5dSHong Zhang }
10742bebee5dSHong Zhang 
1075d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1076d71ae5a4SJacob Faibussowitsch {
10779bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10789bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
10799bd0847aSShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1080910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10819bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
1082910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
1083910cf402Sprj-   const PetscScalar *b, *aa  = a->a, *v;
1084910cf402Sprj-   PetscBool          isdense;
10859bd0847aSShri Abhyankar 
10869bd0847aSShri Abhyankar   PetscFunctionBegin;
10873ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
10889566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
108928b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1090f9fb9879SHong Zhang   if (X != B) {
10919566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
109228b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1093f9fb9879SHong Zhang   }
10949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10959566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10969566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10979566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10989371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10999371c9d4SSatish Balay   r = rout;
11009371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11019371c9d4SSatish Balay   c = cout;
11029bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
11039bd0847aSShri Abhyankar     /* forward solve the lower triangular */
11049bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
11059bd0847aSShri Abhyankar     v      = aa;
11069bd0847aSShri Abhyankar     vi     = aj;
11079bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
11089bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
11099bd0847aSShri Abhyankar       sum = b[r[i]];
11109bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
11119bd0847aSShri Abhyankar       tmp[i] = sum;
11129371c9d4SSatish Balay       v += nz;
11139371c9d4SSatish Balay       vi += nz;
11149bd0847aSShri Abhyankar     }
11159bd0847aSShri Abhyankar     /* backward solve the upper triangular */
11169bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
11179bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
11189bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
11199bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
11209bd0847aSShri Abhyankar       sum = tmp[i];
11219bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
11229bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
11239bd0847aSShri Abhyankar     }
1124910cf402Sprj-     b += ldb;
1125910cf402Sprj-     x += ldx;
11269bd0847aSShri Abhyankar   }
11279566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11289566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11299566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
11309566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
11319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
11323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11339bd0847aSShri Abhyankar }
11349bd0847aSShri Abhyankar 
1135ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1136d71ae5a4SJacob Faibussowitsch {
1137137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1138137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
11395d0c19d7SBarry Smith   const PetscInt    *r, *c, *rout, *cout;
11405d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
11415d0c19d7SBarry Smith   PetscInt           nz, row;
1142fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1143fdc842d1SBarry Smith   const PetscScalar *b;
1144dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1145137fb511SHong Zhang 
1146137fb511SHong Zhang   PetscFunctionBegin;
11473ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1148137fb511SHong Zhang 
11499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1151137fb511SHong Zhang   tmp = a->solve_work;
1152137fb511SHong Zhang 
11539371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11549371c9d4SSatish Balay   r = rout;
11559371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11569371c9d4SSatish Balay   c = cout + (n - 1);
1157137fb511SHong Zhang 
1158137fb511SHong Zhang   /* forward solve the lower triangular */
1159137fb511SHong Zhang   tmp[0] = b[*r++];
1160137fb511SHong Zhang   tmps   = tmp;
1161137fb511SHong Zhang   for (row = 1; row < n; row++) {
1162137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1163137fb511SHong Zhang     v   = aa + ai[i];
1164137fb511SHong Zhang     vi  = aj + ai[i];
1165137fb511SHong Zhang     nz  = a->diag[i] - ai[i];
1166137fb511SHong Zhang     sum = b[*r++];
1167003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1168137fb511SHong Zhang     tmp[row] = sum;
1169137fb511SHong Zhang   }
1170137fb511SHong Zhang 
1171137fb511SHong Zhang   /* backward solve the upper triangular */
1172137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1173137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1174137fb511SHong Zhang     v   = aa + a->diag[i] + 1;
1175137fb511SHong Zhang     vi  = aj + a->diag[i] + 1;
1176137fb511SHong Zhang     nz  = ai[i + 1] - a->diag[i] - 1;
1177137fb511SHong Zhang     sum = tmp[row];
1178003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1179137fb511SHong Zhang     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1180137fb511SHong Zhang   }
1181137fb511SHong Zhang 
11829566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11839566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
11869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
11873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1188137fb511SHong Zhang }
1189137fb511SHong Zhang 
1190c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1191ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1192d71ae5a4SJacob Faibussowitsch {
1193930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1194d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1195b377110cSBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1196356650c2SBarry Smith   PetscScalar       *x;
1197356650c2SBarry Smith   const PetscScalar *b;
1198dd6ea824SBarry Smith   const MatScalar   *aa = a->a;
1199aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1200356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1201b377110cSBarry Smith   const PetscInt  *vi;
1202dd6ea824SBarry Smith   const MatScalar *v;
1203dd6ea824SBarry Smith   PetscScalar      sum;
1204d85afc42SSatish Balay #endif
1205930ae53cSBarry Smith 
12063a40ed3dSBarry Smith   PetscFunctionBegin;
12073ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1208930ae53cSBarry Smith 
12099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1211930ae53cSBarry Smith 
1212aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
12131c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
12141c4feecaSSatish Balay #else
1215930ae53cSBarry Smith   /* forward solve the lower triangular */
1216930ae53cSBarry Smith   x[0] = b[0];
1217930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1218930ae53cSBarry Smith     ai_i = ai[i];
1219930ae53cSBarry Smith     v    = aa + ai_i;
1220930ae53cSBarry Smith     vi   = aj + ai_i;
1221930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1222930ae53cSBarry Smith     sum  = b[i];
1223003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1224930ae53cSBarry Smith     x[i] = sum;
1225930ae53cSBarry Smith   }
1226930ae53cSBarry Smith 
1227930ae53cSBarry Smith   /* backward solve the upper triangular */
1228930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1229930ae53cSBarry Smith     adiag_i = adiag[i];
1230d708749eSBarry Smith     v       = aa + adiag_i + 1;
1231d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1232930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1233930ae53cSBarry Smith     sum     = x[i];
1234003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1235930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1236930ae53cSBarry Smith   }
12371c4feecaSSatish Balay #endif
12389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
12413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1242930ae53cSBarry Smith }
1243930ae53cSBarry Smith 
1244ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1245d71ae5a4SJacob Faibussowitsch {
1246416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1247416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
124804fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
12495d0c19d7SBarry Smith   PetscInt           nz;
125004fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
125104fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
125204fbf559SBarry Smith   const PetscScalar *b;
1253dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
1254da3a660dSBarry Smith 
12553a40ed3dSBarry Smith   PetscFunctionBegin;
12569566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1257da3a660dSBarry Smith 
12589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1260416022c9SBarry Smith   tmp = a->solve_work;
1261da3a660dSBarry Smith 
12629371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12639371c9d4SSatish Balay   r = rout;
12649371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12659371c9d4SSatish Balay   c = cout + (n - 1);
1266da3a660dSBarry Smith 
1267da3a660dSBarry Smith   /* forward solve the lower triangular */
1268da3a660dSBarry Smith   tmp[0] = b[*r++];
1269da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1270010a20caSHong Zhang     v   = aa + ai[i];
1271010a20caSHong Zhang     vi  = aj + ai[i];
1272416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
1273da3a660dSBarry Smith     sum = b[*r++];
127404fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1275da3a660dSBarry Smith     tmp[i] = sum;
1276da3a660dSBarry Smith   }
1277da3a660dSBarry Smith 
1278da3a660dSBarry Smith   /* backward solve the upper triangular */
1279da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1280010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1281010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1282416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
1283da3a660dSBarry Smith     sum = tmp[i];
128404fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1285010a20caSHong Zhang     tmp[i] = sum * aa[a->diag[i]];
1286da3a660dSBarry Smith     x[*c--] += tmp[i];
1287da3a660dSBarry Smith   }
1288da3a660dSBarry Smith 
12899566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12909566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
12943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1295da3a660dSBarry Smith }
129604fbf559SBarry Smith 
1297d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1298d71ae5a4SJacob Faibussowitsch {
12993c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13003c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
13013c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
13023c0119dfSShri Abhyankar   PetscInt           nz;
13033c0119dfSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
13043c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
13053c0119dfSShri Abhyankar   const PetscScalar *b;
13063c0119dfSShri Abhyankar   const MatScalar   *aa = a->a, *v;
13073c0119dfSShri Abhyankar 
13083c0119dfSShri Abhyankar   PetscFunctionBegin;
13099566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
13103c0119dfSShri Abhyankar 
13119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
13133c0119dfSShri Abhyankar   tmp = a->solve_work;
13143c0119dfSShri Abhyankar 
13159371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13169371c9d4SSatish Balay   r = rout;
13179371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13189371c9d4SSatish Balay   c = cout;
13193c0119dfSShri Abhyankar 
13203c0119dfSShri Abhyankar   /* forward solve the lower triangular */
13213c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
13223c0119dfSShri Abhyankar   v      = aa;
13233c0119dfSShri Abhyankar   vi     = aj;
13243c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
13253c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
13263c0119dfSShri Abhyankar     sum = b[r[i]];
13273c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
13283c0119dfSShri Abhyankar     tmp[i] = sum;
13292205254eSKarl Rupp     v += nz;
13302205254eSKarl Rupp     vi += nz;
13313c0119dfSShri Abhyankar   }
13323c0119dfSShri Abhyankar 
13333c0119dfSShri Abhyankar   /* backward solve the upper triangular */
13343c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
13353c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
13363c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
13373c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
13383c0119dfSShri Abhyankar     sum = tmp[i];
13393c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
13403c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
13413c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
13429371c9d4SSatish Balay     v += nz + 1;
13439371c9d4SSatish Balay     vi += nz + 1;
13443c0119dfSShri Abhyankar   }
13453c0119dfSShri Abhyankar 
13469566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13479566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
13513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13523c0119dfSShri Abhyankar }
13533c0119dfSShri Abhyankar 
1354d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1355d71ae5a4SJacob Faibussowitsch {
1356416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13572235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
135804fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
135904fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
136004fbf559SBarry Smith   PetscInt           nz;
136104fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1362dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
136304fbf559SBarry Smith   const PetscScalar *b;
1364da3a660dSBarry Smith 
13653a40ed3dSBarry Smith   PetscFunctionBegin;
13669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1368416022c9SBarry Smith   tmp = a->solve_work;
1369da3a660dSBarry Smith 
13709371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13719371c9d4SSatish Balay   r = rout;
13729371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13739371c9d4SSatish Balay   c = cout;
1374da3a660dSBarry Smith 
1375da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
13762235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1377da3a660dSBarry Smith 
1378da3a660dSBarry Smith   /* forward solve the U^T */
1379da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1380010a20caSHong Zhang     v  = aa + diag[i];
1381010a20caSHong Zhang     vi = aj + diag[i] + 1;
1382f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1383c38d4ed2SBarry Smith     s1 = tmp[i];
1384ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
138504fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1386c38d4ed2SBarry Smith     tmp[i] = s1;
1387da3a660dSBarry Smith   }
1388da3a660dSBarry Smith 
1389da3a660dSBarry Smith   /* backward solve the L^T */
1390da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1391010a20caSHong Zhang     v  = aa + diag[i] - 1;
1392010a20caSHong Zhang     vi = aj + diag[i] - 1;
1393f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1394f1af5d2fSBarry Smith     s1 = tmp[i];
139504fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1396da3a660dSBarry Smith   }
1397da3a660dSBarry Smith 
1398da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1399591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1400da3a660dSBarry Smith 
14019566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14029566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1405da3a660dSBarry Smith 
14069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1408da3a660dSBarry Smith }
1409da3a660dSBarry Smith 
1410d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1411d71ae5a4SJacob Faibussowitsch {
1412d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1413d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1414d1fa9404SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1415d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1416d1fa9404SShri Abhyankar   PetscInt           nz;
1417d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1418d1fa9404SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1419d1fa9404SShri Abhyankar   const PetscScalar *b;
1420d1fa9404SShri Abhyankar 
1421d1fa9404SShri Abhyankar   PetscFunctionBegin;
14229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1424d1fa9404SShri Abhyankar   tmp = a->solve_work;
1425d1fa9404SShri Abhyankar 
14269371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14279371c9d4SSatish Balay   r = rout;
14289371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14299371c9d4SSatish Balay   c = cout;
1430d1fa9404SShri Abhyankar 
1431d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1432d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1433d1fa9404SShri Abhyankar 
1434d1fa9404SShri Abhyankar   /* forward solve the U^T */
1435d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1436d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1437d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1438d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1439d1fa9404SShri Abhyankar     s1 = tmp[i];
1440d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1441d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442d1fa9404SShri Abhyankar     tmp[i] = s1;
1443d1fa9404SShri Abhyankar   }
1444d1fa9404SShri Abhyankar 
1445d1fa9404SShri Abhyankar   /* backward solve the L^T */
1446d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1447d1fa9404SShri Abhyankar     v  = aa + ai[i];
1448d1fa9404SShri Abhyankar     vi = aj + ai[i];
1449d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1450d1fa9404SShri Abhyankar     s1 = tmp[i];
1451d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1452d1fa9404SShri Abhyankar   }
1453d1fa9404SShri Abhyankar 
1454d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1455d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1456d1fa9404SShri Abhyankar 
14579566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1461d1fa9404SShri Abhyankar 
14629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1464d1fa9404SShri Abhyankar }
1465d1fa9404SShri Abhyankar 
1466d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1467d71ae5a4SJacob Faibussowitsch {
1468416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
14692235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
147004fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
147104fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
147204fbf559SBarry Smith   PetscInt           nz;
147304fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1474dd6ea824SBarry Smith   const MatScalar   *aa = a->a, *v;
147504fbf559SBarry Smith   const PetscScalar *b;
14766abc6512SBarry Smith 
14773a40ed3dSBarry Smith   PetscFunctionBegin;
14789566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
14799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1481416022c9SBarry Smith   tmp = a->solve_work;
14826abc6512SBarry Smith 
14839371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14849371c9d4SSatish Balay   r = rout;
14859371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14869371c9d4SSatish Balay   c = cout;
14876abc6512SBarry Smith 
14886abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
14892235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
14906abc6512SBarry Smith 
14916abc6512SBarry Smith   /* forward solve the U^T */
14926abc6512SBarry Smith   for (i = 0; i < n; i++) {
1493010a20caSHong Zhang     v  = aa + diag[i];
1494010a20caSHong Zhang     vi = aj + diag[i] + 1;
1495f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
149604fbf559SBarry Smith     s1 = tmp[i];
149704fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
149804fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
149904fbf559SBarry Smith     tmp[i] = s1;
15006abc6512SBarry Smith   }
15016abc6512SBarry Smith 
15026abc6512SBarry Smith   /* backward solve the L^T */
15036abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1504010a20caSHong Zhang     v  = aa + diag[i] - 1;
1505010a20caSHong Zhang     vi = aj + diag[i] - 1;
1506f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
150704fbf559SBarry Smith     s1 = tmp[i];
150804fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
15096abc6512SBarry Smith   }
15106abc6512SBarry Smith 
15116abc6512SBarry Smith   /* copy tmp into x according to permutation */
15126abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
15136abc6512SBarry Smith 
15149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
15169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
15186abc6512SBarry Smith 
15199566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
15203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1521da3a660dSBarry Smith }
152204fbf559SBarry Smith 
1523d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1524d71ae5a4SJacob Faibussowitsch {
1525533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1526533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1527533e6011SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1528533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1529533e6011SShri Abhyankar   PetscInt           nz;
1530533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1531533e6011SShri Abhyankar   const MatScalar   *aa = a->a, *v;
1532533e6011SShri Abhyankar   const PetscScalar *b;
1533533e6011SShri Abhyankar 
1534533e6011SShri Abhyankar   PetscFunctionBegin;
15359566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
15369566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
15379566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1538533e6011SShri Abhyankar   tmp = a->solve_work;
1539533e6011SShri Abhyankar 
15409371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
15419371c9d4SSatish Balay   r = rout;
15429371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
15439371c9d4SSatish Balay   c = cout;
1544533e6011SShri Abhyankar 
1545533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1546533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1547533e6011SShri Abhyankar 
1548533e6011SShri Abhyankar   /* forward solve the U^T */
1549533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1550533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1551533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1552533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1553533e6011SShri Abhyankar     s1 = tmp[i];
1554533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1555533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1556533e6011SShri Abhyankar     tmp[i] = s1;
1557533e6011SShri Abhyankar   }
1558533e6011SShri Abhyankar 
1559533e6011SShri Abhyankar   /* backward solve the L^T */
1560533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1561533e6011SShri Abhyankar     v  = aa + ai[i];
1562533e6011SShri Abhyankar     vi = aj + ai[i];
1563533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1564533e6011SShri Abhyankar     s1 = tmp[i];
1565c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566533e6011SShri Abhyankar   }
1567533e6011SShri Abhyankar 
1568533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1569533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1570533e6011SShri Abhyankar 
15719566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15729566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
15739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1575533e6011SShri Abhyankar 
15769566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
15773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1578533e6011SShri Abhyankar }
1579533e6011SShri Abhyankar 
15808fc3a347SHong Zhang /*
15818a76ed4fSHong Zhang    ilu() under revised new data structure.
1582813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1583813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1584813a1f2bSShri Abhyankar 
1585813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1586baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1587baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1588baabb860SHong Zhang 
1589813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1590baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1591a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1592813a1f2bSShri Abhyankar 
1593813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1594813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1595813a1f2bSShri Abhyankar */
1596d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1597d71ae5a4SJacob Faibussowitsch {
1598813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1599bbd65245SShri Abhyankar   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1600bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
16011df811f5SHong Zhang   IS             isicol;
1602813a1f2bSShri Abhyankar 
1603813a1f2bSShri Abhyankar   PetscFunctionBegin;
16049566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
16059566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1606813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
1607813a1f2bSShri Abhyankar 
1608813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
16099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
16102205254eSKarl Rupp 
1611813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_TRUE;
1612aa624791SPierre Jolivet   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1613813a1f2bSShri Abhyankar   bdiag = b->diag;
1614813a1f2bSShri Abhyankar 
161548a46eb9SPierre Jolivet   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1616813a1f2bSShri Abhyankar 
1617813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1618813a1f2bSShri Abhyankar   bi = b->i;
1619813a1f2bSShri Abhyankar   bj = b->j;
1620813a1f2bSShri Abhyankar 
1621813a1f2bSShri Abhyankar   /* L part */
1622e60cf9a0SBarry Smith   bi[0] = 0;
1623813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1624813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1625813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1626813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
1627ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1628813a1f2bSShri Abhyankar   }
1629813a1f2bSShri Abhyankar 
1630813a1f2bSShri Abhyankar   /* U part */
163162fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1632813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1633813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1634813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
1635ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1636813a1f2bSShri Abhyankar     /* diag[i] */
1637bbd65245SShri Abhyankar     bj[k++]  = i;
1638a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1639813a1f2bSShri Abhyankar   }
16401df811f5SHong Zhang 
1641d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
16421df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
16431df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
16441df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
164535233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
16469566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
16471df811f5SHong Zhang 
16481df811f5SHong Zhang   b       = (Mat_SeqAIJ *)(fact)->data;
16491df811f5SHong Zhang   b->row  = isrow;
16501df811f5SHong Zhang   b->col  = iscol;
16511df811f5SHong Zhang   b->icol = isicol;
16529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
16539566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
16549566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
16553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1656813a1f2bSShri Abhyankar }
1657813a1f2bSShri Abhyankar 
1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1659d71ae5a4SJacob Faibussowitsch {
1660813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1661813a1f2bSShri Abhyankar   IS                 isicol;
1662813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
16631df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1664813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1665813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1666813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1667035f4963SHong Zhang   PetscBool          col_identity, row_identity, missing;
1668813a1f2bSShri Abhyankar   PetscReal          f;
16690298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1670813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1671813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
16720298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
16730298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1674813a1f2bSShri Abhyankar 
1675813a1f2bSShri Abhyankar   PetscFunctionBegin;
167608401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
16779566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
167828b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1679ad04f41aSHong Zhang 
1680813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
16819566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
16829566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1683813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1684813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
16859566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1686ad540459SPierre Jolivet     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
16873ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1688813a1f2bSShri Abhyankar   }
1689813a1f2bSShri Abhyankar 
16909566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
16919566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
16929566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1693813a1f2bSShri Abhyankar 
16941bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
16959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
16969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1697e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
16989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1699813a1f2bSShri Abhyankar 
1700813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1701813a1f2bSShri Abhyankar   nlnk = n + 1;
17029566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1703813a1f2bSShri Abhyankar 
1704813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
17051df811f5SHong Zhang   f             = info->fill;
17061df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
17079566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1708813a1f2bSShri Abhyankar   current_space = free_space;
17099566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1710813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1711813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1712813a1f2bSShri Abhyankar     nzi = 0;
1713813a1f2bSShri Abhyankar     /* copy current row into linked list */
1714813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
171528b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1716813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1717813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
17189566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1719813a1f2bSShri Abhyankar     nzi += nlnk;
1720813a1f2bSShri Abhyankar 
1721813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1722813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1723813a1f2bSShri Abhyankar       fm = n;
1724813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1725813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1726813a1f2bSShri Abhyankar       lnk[fm]    = i;
1727813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
17289371c9d4SSatish Balay       nzi++;
17299371c9d4SSatish Balay       dcount++;
1730813a1f2bSShri Abhyankar     }
1731813a1f2bSShri Abhyankar 
1732813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1733813a1f2bSShri Abhyankar     nzbd = 0;
1734813a1f2bSShri Abhyankar     prow = lnk[n];
1735813a1f2bSShri Abhyankar     while (prow < i) {
1736813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1737813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1738813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1739813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
17409566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1741813a1f2bSShri Abhyankar       nzi += nlnk;
1742813a1f2bSShri Abhyankar       prow = lnk[prow];
1743813a1f2bSShri Abhyankar       nzbd++;
1744813a1f2bSShri Abhyankar     }
1745813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1746813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1747813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1748813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1749f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
17509566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
17519566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1752813a1f2bSShri Abhyankar       reallocs++;
1753813a1f2bSShri Abhyankar     }
1754813a1f2bSShri Abhyankar 
1755813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
17569566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1757813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1758813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1759813a1f2bSShri Abhyankar 
1760813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
176108401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1762813a1f2bSShri Abhyankar 
1763813a1f2bSShri Abhyankar     current_space->array += nzi;
1764813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1765813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1766813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1767813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1768813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1769813a1f2bSShri Abhyankar   }
1770813a1f2bSShri Abhyankar 
17719566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
17729566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1773813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
17749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
17759566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1776813a1f2bSShri Abhyankar 
17779566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
17789566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
17799566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1780813a1f2bSShri Abhyankar 
1781813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1782813a1f2bSShri Abhyankar   {
1783aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
17849566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
17859566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
17869566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
17879566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
178848a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1789813a1f2bSShri Abhyankar   }
1790813a1f2bSShri Abhyankar #endif
1791813a1f2bSShri Abhyankar   /* put together the new matrix */
17929566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1793813a1f2bSShri Abhyankar   b = (Mat_SeqAIJ *)(fact)->data;
17942205254eSKarl Rupp 
1795813a1f2bSShri Abhyankar   b->free_a       = PETSC_TRUE;
1796813a1f2bSShri Abhyankar   b->free_ij      = PETSC_TRUE;
1797813a1f2bSShri Abhyankar   b->singlemalloc = PETSC_FALSE;
17982205254eSKarl Rupp 
17999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
18002205254eSKarl Rupp 
1801813a1f2bSShri Abhyankar   b->j    = bj;
1802813a1f2bSShri Abhyankar   b->i    = bi;
1803813a1f2bSShri Abhyankar   b->diag = bdiag;
1804f4259b30SLisandro Dalcin   b->ilen = NULL;
1805f4259b30SLisandro Dalcin   b->imax = NULL;
1806813a1f2bSShri Abhyankar   b->row  = isrow;
1807813a1f2bSShri Abhyankar   b->col  = iscol;
18089566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
18099566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1810813a1f2bSShri Abhyankar   b->icol = isicol;
18112205254eSKarl Rupp 
18129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1813813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1814813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
1815f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
18162205254eSKarl Rupp 
1817813a1f2bSShri Abhyankar   (fact)->info.factor_mallocs    = reallocs;
1818813a1f2bSShri Abhyankar   (fact)->info.fill_ratio_given  = f;
1819f268cba8SShri Abhyankar   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
182035233d99SShri Abhyankar   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1821ad540459SPierre Jolivet   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
18229566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
18233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1824813a1f2bSShri Abhyankar }
1825813a1f2bSShri Abhyankar 
1826ff6a9541SJacob Faibussowitsch #if 0
1827ff6a9541SJacob Faibussowitsch // unused
1828ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1829d71ae5a4SJacob Faibussowitsch {
18306516239fSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
18316516239fSHong Zhang   IS                 isicol;
18326516239fSHong Zhang   const PetscInt    *r, *ic;
1833ece542f9SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
18346516239fSHong Zhang   PetscInt          *bi, *cols, nnz, *cols_lvl;
18356516239fSHong Zhang   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
18366516239fSHong Zhang   PetscInt           i, levels, diagonal_fill;
1837ace3abfcSBarry Smith   PetscBool          col_identity, row_identity;
18386516239fSHong Zhang   PetscReal          f;
18390298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
18406516239fSHong Zhang   PetscBT            lnkbt;
18416516239fSHong Zhang   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
18420298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
18430298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1844ace3abfcSBarry Smith   PetscBool          missing;
18456516239fSHong Zhang 
18466516239fSHong Zhang   PetscFunctionBegin;
184708401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
18489566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
184928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1850ece542f9SHong Zhang 
18516516239fSHong Zhang   f             = info->fill;
18526516239fSHong Zhang   levels        = (PetscInt)info->levels;
18536516239fSHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
18542205254eSKarl Rupp 
18559566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
18566516239fSHong Zhang 
18579566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
18589566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
18596516239fSHong Zhang   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
18609566063dSJacob Faibussowitsch     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
18612205254eSKarl Rupp 
1862ad04f41aSHong Zhang     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1863ad540459SPierre Jolivet     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1864d5f3da31SBarry Smith     fact->factortype               = MAT_FACTOR_ILU;
1865719d5645SBarry Smith     (fact)->info.factor_mallocs    = 0;
1866719d5645SBarry Smith     (fact)->info.fill_ratio_given  = info->fill;
1867719d5645SBarry Smith     (fact)->info.fill_ratio_needed = 1.0;
18682205254eSKarl Rupp 
1869719d5645SBarry Smith     b       = (Mat_SeqAIJ *)(fact)->data;
187008480c60SBarry Smith     b->row  = isrow;
187108480c60SBarry Smith     b->col  = iscol;
187282bf6240SBarry Smith     b->icol = isicol;
18739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
18749566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)isrow));
18759566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)iscol));
18763ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
187708480c60SBarry Smith   }
187808480c60SBarry Smith 
18799566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
18809566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
18819e25ed09SBarry Smith 
18821bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
18839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
18849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1885a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
18866cf997b0SBarry Smith 
18879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
18885a8e39fbSHong Zhang 
18895a8e39fbSHong Zhang   /* create a linked list for storing column indices of the active row */
18905a8e39fbSHong Zhang   nlnk = n + 1;
18919566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
18925a8e39fbSHong Zhang 
18935a8e39fbSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
18949566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
18955a8e39fbSHong Zhang   current_space = free_space;
18969566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
18975a8e39fbSHong Zhang   current_space_lvl = free_space_lvl;
18985a8e39fbSHong Zhang 
18995a8e39fbSHong Zhang   for (i = 0; i < n; i++) {
19005a8e39fbSHong Zhang     nzi = 0;
19016cf997b0SBarry Smith     /* copy current row into linked list */
19025a8e39fbSHong Zhang     nnz = ai[r[i] + 1] - ai[r[i]];
190328b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
19045a8e39fbSHong Zhang     cols   = aj + ai[r[i]];
19055a8e39fbSHong Zhang     lnk[i] = -1; /* marker to indicate if diagonal exists */
19069566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
19075a8e39fbSHong Zhang     nzi += nlnk;
19086cf997b0SBarry Smith 
19096cf997b0SBarry Smith     /* make sure diagonal entry is included */
19105a8e39fbSHong Zhang     if (diagonal_fill && lnk[i] == -1) {
19116cf997b0SBarry Smith       fm = n;
19125a8e39fbSHong Zhang       while (lnk[fm] < i) fm = lnk[fm];
19135a8e39fbSHong Zhang       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
19145a8e39fbSHong Zhang       lnk[fm]    = i;
19155a8e39fbSHong Zhang       lnk_lvl[i] = 0;
19169371c9d4SSatish Balay       nzi++;
19179371c9d4SSatish Balay       dcount++;
19186cf997b0SBarry Smith     }
19196cf997b0SBarry Smith 
19205a8e39fbSHong Zhang     /* add pivot rows into the active row */
19215a8e39fbSHong Zhang     nzbd = 0;
19225a8e39fbSHong Zhang     prow = lnk[n];
19235a8e39fbSHong Zhang     while (prow < i) {
19245a8e39fbSHong Zhang       nnz      = bdiag[prow];
19255a8e39fbSHong Zhang       cols     = bj_ptr[prow] + nnz + 1;
19265a8e39fbSHong Zhang       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
19275a8e39fbSHong Zhang       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
19289566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
19295a8e39fbSHong Zhang       nzi += nlnk;
19305a8e39fbSHong Zhang       prow = lnk[prow];
19315a8e39fbSHong Zhang       nzbd++;
193256beaf04SBarry Smith     }
19335a8e39fbSHong Zhang     bdiag[i]  = nzbd;
19345a8e39fbSHong Zhang     bi[i + 1] = bi[i] + nzi;
1935ecf371e4SBarry Smith 
19365a8e39fbSHong Zhang     /* if free space is not available, make more free space */
19375a8e39fbSHong Zhang     if (current_space->local_remaining < nzi) {
1938f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
19399566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
19409566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
19415a8e39fbSHong Zhang       reallocs++;
19425a8e39fbSHong Zhang     }
1943ecf371e4SBarry Smith 
19445a8e39fbSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
19459566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
19465a8e39fbSHong Zhang     bj_ptr[i]    = current_space->array;
19475a8e39fbSHong Zhang     bjlvl_ptr[i] = current_space_lvl->array;
19485a8e39fbSHong Zhang 
19495a8e39fbSHong Zhang     /* make sure the active row i has diagonal entry */
195008401ef6SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
19515a8e39fbSHong Zhang 
19525a8e39fbSHong Zhang     current_space->array += nzi;
19535a8e39fbSHong Zhang     current_space->local_used += nzi;
19545a8e39fbSHong Zhang     current_space->local_remaining -= nzi;
19555a8e39fbSHong Zhang     current_space_lvl->array += nzi;
19565a8e39fbSHong Zhang     current_space_lvl->local_used += nzi;
19575a8e39fbSHong Zhang     current_space_lvl->local_remaining -= nzi;
19589e25ed09SBarry Smith   }
19595a8e39fbSHong Zhang 
19609566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
19619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
19625a8e39fbSHong Zhang 
19635a8e39fbSHong Zhang   /* destroy list of free space and other temporary arrays */
19649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
19659566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
19669566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
19679566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
19689566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
19699e25ed09SBarry Smith 
19706cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
1971f64065bbSBarry Smith   {
19725a8e39fbSHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
19739566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
19749566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
19759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
19769566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
197748a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1978f64065bbSBarry Smith   }
197963ba0a88SBarry Smith   #endif
1980bbb6d6a8SBarry Smith 
19819e25ed09SBarry Smith   /* put together the new matrix */
19829566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1983719d5645SBarry Smith   b = (Mat_SeqAIJ *)(fact)->data;
19842205254eSKarl Rupp 
1985e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
1986e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
19877c922b88SBarry Smith   b->singlemalloc = PETSC_FALSE;
19882205254eSKarl Rupp 
19899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n], &b->a));
19905a8e39fbSHong Zhang   b->j = bj;
19915a8e39fbSHong Zhang   b->i = bi;
19925a8e39fbSHong Zhang   for (i = 0; i < n; i++) bdiag[i] += bi[i];
19935a8e39fbSHong Zhang   b->diag = bdiag;
1994f4259b30SLisandro Dalcin   b->ilen = NULL;
1995f4259b30SLisandro Dalcin   b->imax = NULL;
1996416022c9SBarry Smith   b->row  = isrow;
1997416022c9SBarry Smith   b->col  = iscol;
19989566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
19999566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
200082bf6240SBarry Smith   b->icol = isicol;
20019566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2002416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
20035a8e39fbSHong Zhang      Allocate bdiag, solve_work, new a, new j */
20045a8e39fbSHong Zhang   b->maxnz = b->nz = bi[n];
20052205254eSKarl Rupp 
2006719d5645SBarry Smith   (fact)->info.factor_mallocs    = reallocs;
2007719d5645SBarry Smith   (fact)->info.fill_ratio_given  = f;
2008719d5645SBarry Smith   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2009ad04f41aSHong Zhang   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2010ad540459SPierre Jolivet   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
20113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20129e25ed09SBarry Smith }
2013ff6a9541SJacob Faibussowitsch #endif
2014d5d45c9bSBarry Smith 
2015d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2016d71ae5a4SJacob Faibussowitsch {
20175f44c854SHong Zhang   Mat             C  = B;
20185f44c854SHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
20195f44c854SHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
20205f44c854SHong Zhang   IS              ip = b->row, iip = b->icol;
20215f44c854SHong Zhang   const PetscInt *rip, *riip;
20225f44c854SHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
20235f44c854SHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
20245f44c854SHong Zhang   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
20255f44c854SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2026ace3abfcSBarry Smith   PetscBool       perm_identity;
2027d90ac83dSHong Zhang   FactorShiftCtx  sctx;
202898a93161SHong Zhang   PetscReal       rs;
202998a93161SHong Zhang   MatScalar       d, *v;
203098a93161SHong Zhang 
20315f44c854SHong Zhang   PetscFunctionBegin;
203298a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
20339566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
203498a93161SHong Zhang 
2035f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
203698a93161SHong Zhang     sctx.shift_top = info->zeropivot;
203798a93161SHong Zhang     for (i = 0; i < mbs; i++) {
203898a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
203998a93161SHong Zhang       d  = (aa)[a->diag[i]];
204098a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
204198a93161SHong Zhang       v  = aa + ai[i];
204298a93161SHong Zhang       nz = ai[i + 1] - ai[i];
20432205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
204498a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
204598a93161SHong Zhang     }
204698a93161SHong Zhang     sctx.shift_top *= 1.1;
204798a93161SHong Zhang     sctx.nshift_max = 5;
204898a93161SHong Zhang     sctx.shift_lo   = 0.;
204998a93161SHong Zhang     sctx.shift_hi   = 1.;
205098a93161SHong Zhang   }
20515f44c854SHong Zhang 
20529566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
20539566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
20545f44c854SHong Zhang 
20555f44c854SHong Zhang   /* allocate working arrays
20565f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
20575f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
20585f44c854SHong Zhang   */
20599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
20605f44c854SHong Zhang 
206198a93161SHong Zhang   do {
206207b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
206398a93161SHong Zhang 
2064c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
20652e987568SSatish Balay     if (mbs) il[0] = 0;
20665f44c854SHong Zhang 
20675f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
20685f44c854SHong Zhang       /* zero rtmp */
20695f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
20705f44c854SHong Zhang       bjtmp = bj + bi[k];
20715f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
20725f44c854SHong Zhang 
20735f44c854SHong Zhang       /* load in initial unfactored row */
20745f44c854SHong Zhang       bval = ba + bi[k];
20759371c9d4SSatish Balay       jmin = ai[rip[k]];
20769371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
20775f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
20785f44c854SHong Zhang         col = riip[aj[j]];
20795f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
20805f44c854SHong Zhang           rtmp[col] = aa[j];
20815f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
20825f44c854SHong Zhang         }
20835f44c854SHong Zhang       }
208498a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
208598a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
20865f44c854SHong Zhang 
20875f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
20885f44c854SHong Zhang       dk = rtmp[k];
20895f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
20905f44c854SHong Zhang 
20915f44c854SHong Zhang       while (i < k) {
20925f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
20935f44c854SHong Zhang 
20945f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
20955f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
20965f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
20975f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
20985f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
20995f44c854SHong Zhang 
21005f44c854SHong Zhang         /* add multiple of row i to k-th row */
21019371c9d4SSatish Balay         jmin = ili + 1;
21029371c9d4SSatish Balay         jmax = bi[i + 1];
21035f44c854SHong Zhang         if (jmin < jmax) {
21045f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
21055f44c854SHong Zhang           /* update il and c2r for row i */
21065f44c854SHong Zhang           il[i]  = jmin;
21079371c9d4SSatish Balay           j      = bj[jmin];
21089371c9d4SSatish Balay           c2r[i] = c2r[j];
21099371c9d4SSatish Balay           c2r[j] = i;
21105f44c854SHong Zhang         }
21115f44c854SHong Zhang         i = nexti;
21125f44c854SHong Zhang       }
21135f44c854SHong Zhang 
21145f44c854SHong Zhang       /* copy data into U(k,:) */
211598a93161SHong Zhang       rs   = 0.0;
21169371c9d4SSatish Balay       jmin = bi[k];
21179371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
21185f44c854SHong Zhang       if (jmin < jmax) {
21195f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
21209371c9d4SSatish Balay           col   = bj[j];
21219371c9d4SSatish Balay           ba[j] = rtmp[col];
21229371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
21235f44c854SHong Zhang         }
21245f44c854SHong Zhang         /* add the k-th row into il and c2r */
21255f44c854SHong Zhang         il[k]  = jmin;
21269371c9d4SSatish Balay         i      = bj[jmin];
21279371c9d4SSatish Balay         c2r[k] = c2r[i];
21289371c9d4SSatish Balay         c2r[i] = k;
21295f44c854SHong Zhang       }
213098a93161SHong Zhang 
213198a93161SHong Zhang       /* MatPivotCheck() */
213298a93161SHong Zhang       sctx.rs = rs;
213398a93161SHong Zhang       sctx.pv = dk;
21349566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
213507b50cabSHong Zhang       if (sctx.newshift) break;
213698a93161SHong Zhang       dk = sctx.pv;
213798a93161SHong Zhang 
213898a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
213998a93161SHong Zhang     }
214007b50cabSHong Zhang   } while (sctx.newshift);
21415f44c854SHong Zhang 
21429566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
21439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
21449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
21455f44c854SHong Zhang 
21469566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
21475f44c854SHong Zhang   if (perm_identity) {
214835233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
214935233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
21502169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
21512169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
21525f44c854SHong Zhang   } else {
215335233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
215435233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
215535233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
215635233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
21575f44c854SHong Zhang   }
21585f44c854SHong Zhang 
21595f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
21605f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
21612205254eSKarl Rupp 
21629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
216398a93161SHong Zhang 
216498a93161SHong Zhang   /* MatPivotView() */
216598a93161SHong Zhang   if (sctx.nshift) {
2166f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
21679566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2168f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
21699566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2170f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
21719566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
217298a93161SHong Zhang     }
217398a93161SHong Zhang   }
21743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21755f44c854SHong Zhang }
21765f44c854SHong Zhang 
2177d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2178d71ae5a4SJacob Faibussowitsch {
2179719d5645SBarry Smith   Mat             C  = B;
21800968510aSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
21812ed133dbSHong Zhang   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
21829bfd6278SHong Zhang   IS              ip = b->row, iip = b->icol;
21835d0c19d7SBarry Smith   const PetscInt *rip, *riip;
2184c5546cabSHong Zhang   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
21852ed133dbSHong Zhang   PetscInt       *ai = a->i, *aj = a->j;
21862ed133dbSHong Zhang   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2187622e2028SHong Zhang   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2188ace3abfcSBarry Smith   PetscBool       perm_identity;
21890e95ead3SHong Zhang   FactorShiftCtx  sctx;
21900e95ead3SHong Zhang   PetscReal       rs;
21910e95ead3SHong Zhang   MatScalar       d, *v;
2192d5d45c9bSBarry Smith 
2193a6175056SHong Zhang   PetscFunctionBegin;
21940e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
21959566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
21960e95ead3SHong Zhang 
21970e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
21980e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
21990e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
22000e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
22010e95ead3SHong Zhang       d  = (aa)[a->diag[i]];
22020e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
22030e95ead3SHong Zhang       v  = aa + ai[i];
22040e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
22052205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
22060e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
22070e95ead3SHong Zhang     }
22080e95ead3SHong Zhang     sctx.shift_top *= 1.1;
22090e95ead3SHong Zhang     sctx.nshift_max = 5;
22100e95ead3SHong Zhang     sctx.shift_lo   = 0.;
22110e95ead3SHong Zhang     sctx.shift_hi   = 1.;
22120e95ead3SHong Zhang   }
2213ee45ca4aSHong Zhang 
22149566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
22159566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
22162ed133dbSHong Zhang 
22172ed133dbSHong Zhang   /* initialization */
22189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
22190e95ead3SHong Zhang 
22202ed133dbSHong Zhang   do {
222107b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
22220e95ead3SHong Zhang 
2223c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
2224c5546cabSHong Zhang     il[0] = 0;
22252ed133dbSHong Zhang 
22262ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
2227c5546cabSHong Zhang       /* zero rtmp */
2228c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
2229c5546cabSHong Zhang       bjtmp = bj + bi[k];
2230c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2231c5546cabSHong Zhang 
2232c05c3958SHong Zhang       bval = ba + bi[k];
22332ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
22349371c9d4SSatish Balay       jmin = ai[rip[k]];
22359371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
22362ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
22379bfd6278SHong Zhang         col = riip[aj[j]];
22382ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
22392ed133dbSHong Zhang           rtmp[col] = aa[j];
2240c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
22412ed133dbSHong Zhang         }
22422ed133dbSHong Zhang       }
224339e3d630SHong Zhang       /* shift the diagonal of the matrix */
2244540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
22452ed133dbSHong Zhang 
22462ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
22472ed133dbSHong Zhang       dk = rtmp[k];
22482ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
22492ed133dbSHong Zhang 
22502ed133dbSHong Zhang       while (i < k) {
22512ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
22522ed133dbSHong Zhang 
22532ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
22542ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
22552ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
22562ed133dbSHong Zhang         dk += uikdi * ba[ili];
22572ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
22582ed133dbSHong Zhang 
22592ed133dbSHong Zhang         /* add multiple of row i to k-th row */
22609371c9d4SSatish Balay         jmin = ili + 1;
22619371c9d4SSatish Balay         jmax = bi[i + 1];
22622ed133dbSHong Zhang         if (jmin < jmax) {
22632ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
22642ed133dbSHong Zhang           /* update il and jl for row i */
22652ed133dbSHong Zhang           il[i] = jmin;
22669371c9d4SSatish Balay           j     = bj[jmin];
22679371c9d4SSatish Balay           jl[i] = jl[j];
22689371c9d4SSatish Balay           jl[j] = i;
22692ed133dbSHong Zhang         }
22702ed133dbSHong Zhang         i = nexti;
22712ed133dbSHong Zhang       }
22722ed133dbSHong Zhang 
22730697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
22740697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
22750697b6caSHong Zhang       rs   = 0.0;
22763c9e8598SHong Zhang       jmin = bi[k] + 1;
22773c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
22783c9e8598SHong Zhang       bcol = bj + jmin;
2279ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2280540703acSHong Zhang 
2281540703acSHong Zhang       sctx.rs = rs;
2282540703acSHong Zhang       sctx.pv = dk;
22839566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
228407b50cabSHong Zhang       if (sctx.newshift) break;
22850e95ead3SHong Zhang       dk = sctx.pv;
22863c9e8598SHong Zhang 
22873c9e8598SHong Zhang       /* copy data into U(k,:) */
228839e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
22899371c9d4SSatish Balay       jmin      = bi[k] + 1;
22909371c9d4SSatish Balay       jmax      = bi[k + 1];
229139e3d630SHong Zhang       if (jmin < jmax) {
229239e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
22939371c9d4SSatish Balay           col   = bj[j];
22949371c9d4SSatish Balay           ba[j] = rtmp[col];
22953c9e8598SHong Zhang         }
229639e3d630SHong Zhang         /* add the k-th row into il and jl */
22973c9e8598SHong Zhang         il[k] = jmin;
22989371c9d4SSatish Balay         i     = bj[jmin];
22999371c9d4SSatish Balay         jl[k] = jl[i];
23009371c9d4SSatish Balay         jl[i] = k;
23013c9e8598SHong Zhang       }
23023c9e8598SHong Zhang     }
230307b50cabSHong Zhang   } while (sctx.newshift);
23040e95ead3SHong Zhang 
23059566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
23069566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
23079566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2308db4efbfdSBarry Smith 
23099566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2310db4efbfdSBarry Smith   if (perm_identity) {
23110a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
23120a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
23130a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
23140a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2315db4efbfdSBarry Smith   } else {
23160a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
23170a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
23180a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
23190a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2320db4efbfdSBarry Smith   }
2321db4efbfdSBarry Smith 
23223c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
23233c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
23242205254eSKarl Rupp 
23259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2326540703acSHong Zhang   if (sctx.nshift) {
2327f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
23289566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2329f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
23309566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
23310697b6caSHong Zhang     }
23323c9e8598SHong Zhang   }
23333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
23343c9e8598SHong Zhang }
2335a6175056SHong Zhang 
23368a76ed4fSHong Zhang /*
23378a76ed4fSHong Zhang    icc() under revised new data structure.
23388a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
23398a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
23408a76ed4fSHong Zhang 
23418a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
23428a76ed4fSHong Zhang    ui+
23438a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
23448a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
23458a76ed4fSHong Zhang 
23468a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
23478a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
23488a76ed4fSHong Zhang 
23498a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
23508a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
23518a76ed4fSHong Zhang */
23528a76ed4fSHong Zhang 
2353d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2354d71ae5a4SJacob Faibussowitsch {
23550968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2356ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2357ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
23585f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
23595d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
2360ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
23610298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
23625a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2363ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
23640298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
23650298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
23667a48dd6fSHong Zhang   PetscBT            lnkbt;
2367b635d36bSHong Zhang   IS                 iperm;
2368a6175056SHong Zhang 
2369a6175056SHong Zhang   PetscFunctionBegin;
237008401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
23719566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
237228b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
23739566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
23749566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
23757a48dd6fSHong Zhang 
23769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
23779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
237839e3d630SHong Zhang   ui[0] = 0;
237939e3d630SHong Zhang 
23808a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2381622e2028SHong Zhang   if (!levels && perm_identity) {
23825f44c854SHong Zhang     for (i = 0; i < am; i++) {
2383c5546cabSHong Zhang       ncols     = ai[i + 1] - a->diag[i];
2384c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2385c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2386dc1e2a3fSHong Zhang     }
23879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2388dc1e2a3fSHong Zhang     cols = uj;
2389dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
23905f44c854SHong Zhang       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2391dc1e2a3fSHong Zhang       ncols = ai[i + 1] - a->diag[i] - 1;
23925f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2393910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
23945f44c854SHong Zhang     }
23955f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
23969566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
23979566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
23985f44c854SHong Zhang 
23995f44c854SHong Zhang     /* initialization */
24009566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
24015f44c854SHong Zhang 
24025f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
24035f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
24049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
24055f44c854SHong Zhang     for (i = 0; i < am; i++) {
24069371c9d4SSatish Balay       jl[i] = am;
24079371c9d4SSatish Balay       il[i] = 0;
24085f44c854SHong Zhang     }
24095f44c854SHong Zhang 
24105f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
24115f44c854SHong Zhang     nlnk = am + 1;
24129566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
24135f44c854SHong Zhang 
241495b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
24159566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
24165f44c854SHong Zhang     current_space = free_space;
24179566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
24185f44c854SHong Zhang     current_space_lvl = free_space_lvl;
24195f44c854SHong Zhang 
24205f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
24215f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
24225f44c854SHong Zhang       nzk   = 0;
24235f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
242428b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
24255f44c854SHong Zhang       ncols_upper = 0;
24265f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
24275f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
24285f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
24295f44c854SHong Zhang           ajtmp[ncols_upper] = i;
24305f44c854SHong Zhang           ncols_upper++;
24315f44c854SHong Zhang         }
24325f44c854SHong Zhang       }
24339566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
24345f44c854SHong Zhang       nzk += nlnk;
24355f44c854SHong Zhang 
24365f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
24375f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
24385f44c854SHong Zhang 
24395f44c854SHong Zhang       while (prow < k) {
24405f44c854SHong Zhang         nextprow = jl[prow];
24415f44c854SHong Zhang 
24425f44c854SHong Zhang         /* merge prow into k-th row */
24435f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
24445f44c854SHong Zhang         jmax  = ui[prow + 1];
24455f44c854SHong Zhang         ncols = jmax - jmin;
24465f44c854SHong Zhang         i     = jmin - ui[prow];
24475f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
24485f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
24495f44c854SHong Zhang         j     = *(uj - 1);
24509566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
24515f44c854SHong Zhang         nzk += nlnk;
24525f44c854SHong Zhang 
24535f44c854SHong Zhang         /* update il and jl for prow */
24545f44c854SHong Zhang         if (jmin < jmax) {
24555f44c854SHong Zhang           il[prow] = jmin;
24569371c9d4SSatish Balay           j        = *cols;
24579371c9d4SSatish Balay           jl[prow] = jl[j];
24589371c9d4SSatish Balay           jl[j]    = prow;
24595f44c854SHong Zhang         }
24605f44c854SHong Zhang         prow = nextprow;
24615f44c854SHong Zhang       }
24625f44c854SHong Zhang 
24635f44c854SHong Zhang       /* if free space is not available, make more free space */
24645f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
24655f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2466f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
24679566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
24689566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
24695f44c854SHong Zhang         reallocs++;
24705f44c854SHong Zhang       }
24715f44c854SHong Zhang 
24725f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
247308401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
24749566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
24755f44c854SHong Zhang 
24765f44c854SHong Zhang       /* add the k-th row into il and jl */
24775f44c854SHong Zhang       if (nzk > 1) {
24785f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
24799371c9d4SSatish Balay         jl[k] = jl[i];
24809371c9d4SSatish Balay         jl[i] = k;
24815f44c854SHong Zhang         il[k] = ui[k] + 1;
24825f44c854SHong Zhang       }
24835f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
24845f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
24855f44c854SHong Zhang 
24865f44c854SHong Zhang       current_space->array += nzk;
24875f44c854SHong Zhang       current_space->local_used += nzk;
24885f44c854SHong Zhang       current_space->local_remaining -= nzk;
24895f44c854SHong Zhang 
24905f44c854SHong Zhang       current_space_lvl->array += nzk;
24915f44c854SHong Zhang       current_space_lvl->local_used += nzk;
24925f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
24935f44c854SHong Zhang 
24945f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
24955f44c854SHong Zhang     }
24965f44c854SHong Zhang 
24979566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
24989566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
24999566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
25009566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
25015f44c854SHong Zhang 
25029263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
25039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
25049566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
25059566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
25069566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
25075f44c854SHong Zhang 
25085f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
25095f44c854SHong Zhang 
25105f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
25115f44c854SHong Zhang   b               = (Mat_SeqSBAIJ *)(fact)->data;
25125f44c854SHong Zhang   b->singlemalloc = PETSC_FALSE;
25132205254eSKarl Rupp 
25149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
25152205254eSKarl Rupp 
25165f44c854SHong Zhang   b->j         = uj;
25175f44c854SHong Zhang   b->i         = ui;
25185f44c854SHong Zhang   b->diag      = udiag;
25197f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2520f4259b30SLisandro Dalcin   b->ilen      = NULL;
2521f4259b30SLisandro Dalcin   b->imax      = NULL;
25225f44c854SHong Zhang   b->row       = perm;
25235f44c854SHong Zhang   b->col       = perm;
25249566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
25259566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
25265f44c854SHong Zhang   b->icol          = iperm;
25275f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
25282205254eSKarl Rupp 
25299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
25302205254eSKarl Rupp 
25315f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
25325f44c854SHong Zhang   b->free_a        = PETSC_TRUE;
25335f44c854SHong Zhang   b->free_ij       = PETSC_TRUE;
25345f44c854SHong Zhang 
2535f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2536f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
25375f44c854SHong Zhang   if (ai[am] != 0) {
25386a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
253995b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
25405f44c854SHong Zhang   } else {
2541f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
25425f44c854SHong Zhang   }
25439263d837SHong Zhang #if defined(PETSC_USE_INFO)
25449263d837SHong Zhang   if (ai[am] != 0) {
25459263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
25469566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
25479566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
25489566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
25499263d837SHong Zhang   } else {
25509566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
25519263d837SHong Zhang   }
25529263d837SHong Zhang #endif
255335233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
25543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25555f44c854SHong Zhang }
25565f44c854SHong Zhang 
2557ff6a9541SJacob Faibussowitsch #if 0
2558ff6a9541SJacob Faibussowitsch // unused
2559ff6a9541SJacob Faibussowitsch static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2560d71ae5a4SJacob Faibussowitsch {
25615f44c854SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
25625f44c854SHong Zhang   Mat_SeqSBAIJ      *b;
2563ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
25645f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
25655f44c854SHong Zhang   const PetscInt    *rip, *riip;
25665f44c854SHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
25670298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
25685f44c854SHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
25695f44c854SHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
25700298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
25710298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
25725f44c854SHong Zhang   PetscBT            lnkbt;
25735f44c854SHong Zhang   IS                 iperm;
25745f44c854SHong Zhang 
25755f44c854SHong Zhang   PetscFunctionBegin;
257608401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
25779566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
257828b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
25799566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
25809566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
25815f44c854SHong Zhang 
25829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
25839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
25845f44c854SHong Zhang   ui[0] = 0;
25855f44c854SHong Zhang 
25865f44c854SHong Zhang   /* ICC(0) without matrix ordering: simply copies fill pattern */
25875f44c854SHong Zhang   if (!levels && perm_identity) {
25885f44c854SHong Zhang     for (i = 0; i < am; i++) {
25895f44c854SHong Zhang       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
25905f44c854SHong Zhang       udiag[i]  = ui[i];
2591ed59401aSHong Zhang     }
25929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
259339e3d630SHong Zhang     cols = uj;
2594ed59401aSHong Zhang     for (i = 0; i < am; i++) {
2595ed59401aSHong Zhang       aj    = a->j + a->diag[i];
259639e3d630SHong Zhang       ncols = ui[i + 1] - ui[i];
259739e3d630SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2598ed59401aSHong Zhang     }
259939e3d630SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
26009566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
26019566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
2602b635d36bSHong Zhang 
26037a48dd6fSHong Zhang     /* initialization */
26049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
26057a48dd6fSHong Zhang 
26067a48dd6fSHong Zhang     /* jl: linked list for storing indices of the pivot rows
26077a48dd6fSHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
26089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
26097a48dd6fSHong Zhang     for (i = 0; i < am; i++) {
26109371c9d4SSatish Balay       jl[i] = am;
26119371c9d4SSatish Balay       il[i] = 0;
26127a48dd6fSHong Zhang     }
26137a48dd6fSHong Zhang 
26147a48dd6fSHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
26157a48dd6fSHong Zhang     nlnk = am + 1;
26169566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
26177a48dd6fSHong Zhang 
26187a48dd6fSHong Zhang     /* initial FreeSpace size is fill*(ai[am]+1) */
26199566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
26207a48dd6fSHong Zhang     current_space = free_space;
26219566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
26227a48dd6fSHong Zhang     current_space_lvl = free_space_lvl;
26237a48dd6fSHong Zhang 
26247a48dd6fSHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
26257a48dd6fSHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
26267a48dd6fSHong Zhang       nzk   = 0;
2627622e2028SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
262828b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2629622e2028SHong Zhang       ncols_upper = 0;
2630622e2028SHong Zhang       for (j = 0; j < ncols; j++) {
2631b635d36bSHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2632b635d36bSHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
26335a8e39fbSHong Zhang           ajtmp[ncols_upper] = i;
2634622e2028SHong Zhang           ncols_upper++;
2635622e2028SHong Zhang         }
2636622e2028SHong Zhang       }
26379566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
26387a48dd6fSHong Zhang       nzk += nlnk;
26397a48dd6fSHong Zhang 
26407a48dd6fSHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
26417a48dd6fSHong Zhang       prow = jl[k]; /* 1st pivot row */
26427a48dd6fSHong Zhang 
26437a48dd6fSHong Zhang       while (prow < k) {
26447a48dd6fSHong Zhang         nextprow = jl[prow];
26457a48dd6fSHong Zhang 
26467a48dd6fSHong Zhang         /* merge prow into k-th row */
26477a48dd6fSHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
26487a48dd6fSHong Zhang         jmax  = ui[prow + 1];
26497a48dd6fSHong Zhang         ncols = jmax - jmin;
2650ed59401aSHong Zhang         i     = jmin - ui[prow];
2651ed59401aSHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
26525a8e39fbSHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
26535a8e39fbSHong Zhang         j     = *(uj - 1);
26549566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
26557a48dd6fSHong Zhang         nzk += nlnk;
26567a48dd6fSHong Zhang 
26577a48dd6fSHong Zhang         /* update il and jl for prow */
26587a48dd6fSHong Zhang         if (jmin < jmax) {
26597a48dd6fSHong Zhang           il[prow] = jmin;
26609371c9d4SSatish Balay           j        = *cols;
26619371c9d4SSatish Balay           jl[prow] = jl[j];
26629371c9d4SSatish Balay           jl[j]    = prow;
26637a48dd6fSHong Zhang         }
26647a48dd6fSHong Zhang         prow = nextprow;
26657a48dd6fSHong Zhang       }
26667a48dd6fSHong Zhang 
26677a48dd6fSHong Zhang       /* if free space is not available, make more free space */
26687a48dd6fSHong Zhang       if (current_space->local_remaining < nzk) {
26697a48dd6fSHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2670f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
26719566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
26729566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
26737a48dd6fSHong Zhang         reallocs++;
26747a48dd6fSHong Zhang       }
26757a48dd6fSHong Zhang 
26767a48dd6fSHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
267728b400f6SJacob Faibussowitsch       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
26789566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
26797a48dd6fSHong Zhang 
26807a48dd6fSHong Zhang       /* add the k-th row into il and jl */
268139e3d630SHong Zhang       if (nzk > 1) {
26827a48dd6fSHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
26839371c9d4SSatish Balay         jl[k] = jl[i];
26849371c9d4SSatish Balay         jl[i] = k;
26857a48dd6fSHong Zhang         il[k] = ui[k] + 1;
26867a48dd6fSHong Zhang       }
26877a48dd6fSHong Zhang       uj_ptr[k]     = current_space->array;
26887a48dd6fSHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
26897a48dd6fSHong Zhang 
26907a48dd6fSHong Zhang       current_space->array += nzk;
26917a48dd6fSHong Zhang       current_space->local_used += nzk;
26927a48dd6fSHong Zhang       current_space->local_remaining -= nzk;
26937a48dd6fSHong Zhang 
26947a48dd6fSHong Zhang       current_space_lvl->array += nzk;
26957a48dd6fSHong Zhang       current_space_lvl->local_used += nzk;
26967a48dd6fSHong Zhang       current_space_lvl->local_remaining -= nzk;
26977a48dd6fSHong Zhang 
26987a48dd6fSHong Zhang       ui[k + 1] = ui[k] + nzk;
26997a48dd6fSHong Zhang     }
27007a48dd6fSHong Zhang 
27016cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
27027a48dd6fSHong Zhang     if (ai[am] != 0) {
270339e3d630SHong Zhang       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
27049566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
27059566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
27069566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
27077a48dd6fSHong Zhang     } else {
27089566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Empty matrix\n"));
27097a48dd6fSHong Zhang     }
271063ba0a88SBarry Smith   #endif
27117a48dd6fSHong Zhang 
27129566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
27139566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
27149566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
27159566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
27167a48dd6fSHong Zhang 
27177a48dd6fSHong Zhang     /* destroy list of free space and other temporary array(s) */
27189566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
27199566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
27209566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
27219566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
27227a48dd6fSHong Zhang 
272339e3d630SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
272439e3d630SHong Zhang 
27257a48dd6fSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
27267a48dd6fSHong Zhang 
2727f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
27287a48dd6fSHong Zhang   b->singlemalloc = PETSC_FALSE;
27292205254eSKarl Rupp 
27309566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
27312205254eSKarl Rupp 
27327a48dd6fSHong Zhang   b->j         = uj;
27337a48dd6fSHong Zhang   b->i         = ui;
27345f44c854SHong Zhang   b->diag      = udiag;
27357f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2736f4259b30SLisandro Dalcin   b->ilen      = NULL;
2737f4259b30SLisandro Dalcin   b->imax      = NULL;
27387a48dd6fSHong Zhang   b->row       = perm;
2739b635d36bSHong Zhang   b->col       = perm;
27402205254eSKarl Rupp 
27419566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
27429566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
27432205254eSKarl Rupp 
2744b635d36bSHong Zhang   b->icol          = iperm;
27457a48dd6fSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
27469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
27477a48dd6fSHong Zhang   b->maxnz = b->nz = ui[am];
2748e6b907acSBarry Smith   b->free_a        = PETSC_TRUE;
2749e6b907acSBarry Smith   b->free_ij       = PETSC_TRUE;
27507a48dd6fSHong Zhang 
2751f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2752f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
27537a48dd6fSHong Zhang   if (ai[am] != 0) {
2754f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
27557a48dd6fSHong Zhang   } else {
2756f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
27577a48dd6fSHong Zhang   }
27580a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
27593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2760a6175056SHong Zhang }
2761ff6a9541SJacob Faibussowitsch #endif
2762d5d45c9bSBarry Smith 
2763d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2764d71ae5a4SJacob Faibussowitsch {
27650be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
27660be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
27679186b105SHong Zhang   PetscBool          perm_identity, missing;
27680be760fbSHong Zhang   PetscReal          fill = info->fill;
27690be760fbSHong Zhang   const PetscInt    *rip, *riip;
27700be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
27710be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
27720be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
27730298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
27740be760fbSHong Zhang   PetscBT            lnkbt;
27750be760fbSHong Zhang   IS                 iperm;
27760be760fbSHong Zhang 
27770be760fbSHong Zhang   PetscFunctionBegin;
277808401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
27799566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
278028b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
27819186b105SHong Zhang 
27820be760fbSHong Zhang   /* check whether perm is the identity mapping */
27839566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
27849566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
27859566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
27869566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
27870be760fbSHong Zhang 
27880be760fbSHong Zhang   /* initialization */
27899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
27909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
27910be760fbSHong Zhang   ui[0] = 0;
27920be760fbSHong Zhang 
27930be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
27940be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
27959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
27960be760fbSHong Zhang   for (i = 0; i < am; i++) {
27979371c9d4SSatish Balay     jl[i] = am;
27989371c9d4SSatish Balay     il[i] = 0;
27990be760fbSHong Zhang   }
28000be760fbSHong Zhang 
28010be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
28020be760fbSHong Zhang   nlnk = am + 1;
28039566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
28040be760fbSHong Zhang 
280595b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
28069566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
28070be760fbSHong Zhang   current_space = free_space;
28080be760fbSHong Zhang 
28090be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
28100be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
28110be760fbSHong Zhang     nzk   = 0;
28120be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
281328b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
28140be760fbSHong Zhang     ncols_upper = 0;
28150be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
28160be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
28170be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
28180be760fbSHong Zhang         cols[ncols_upper] = i;
28190be760fbSHong Zhang         ncols_upper++;
28200be760fbSHong Zhang       }
28210be760fbSHong Zhang     }
28229566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
28230be760fbSHong Zhang     nzk += nlnk;
28240be760fbSHong Zhang 
28250be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
28260be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
28270be760fbSHong Zhang 
28280be760fbSHong Zhang     while (prow < k) {
28290be760fbSHong Zhang       nextprow = jl[prow];
28300be760fbSHong Zhang       /* merge prow into k-th row */
28310be760fbSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
28320be760fbSHong Zhang       jmax   = ui[prow + 1];
28330be760fbSHong Zhang       ncols  = jmax - jmin;
28340be760fbSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
28359566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
28360be760fbSHong Zhang       nzk += nlnk;
28370be760fbSHong Zhang 
28380be760fbSHong Zhang       /* update il and jl for prow */
28390be760fbSHong Zhang       if (jmin < jmax) {
28400be760fbSHong Zhang         il[prow] = jmin;
28412205254eSKarl Rupp         j        = *uj_ptr;
28422205254eSKarl Rupp         jl[prow] = jl[j];
28432205254eSKarl Rupp         jl[j]    = prow;
28440be760fbSHong Zhang       }
28450be760fbSHong Zhang       prow = nextprow;
28460be760fbSHong Zhang     }
28470be760fbSHong Zhang 
28480be760fbSHong Zhang     /* if free space is not available, make more free space */
28490be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
28500be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2851f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
28529566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
28530be760fbSHong Zhang       reallocs++;
28540be760fbSHong Zhang     }
28550be760fbSHong Zhang 
28560be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
28579566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
28580be760fbSHong Zhang 
28590be760fbSHong Zhang     /* add the k-th row into il and jl */
28607b056e98SHong Zhang     if (nzk > 1) {
28610be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
28629371c9d4SSatish Balay       jl[k] = jl[i];
28639371c9d4SSatish Balay       jl[i] = k;
28640be760fbSHong Zhang       il[k] = ui[k] + 1;
28650be760fbSHong Zhang     }
28660be760fbSHong Zhang     ui_ptr[k] = current_space->array;
28672205254eSKarl Rupp 
28680be760fbSHong Zhang     current_space->array += nzk;
28690be760fbSHong Zhang     current_space->local_used += nzk;
28700be760fbSHong Zhang     current_space->local_remaining -= nzk;
28710be760fbSHong Zhang 
28720be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
28730be760fbSHong Zhang   }
28740be760fbSHong Zhang 
28759566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
28769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
28779566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
28780be760fbSHong Zhang 
28799263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
28809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
28819566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
28829566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
28830be760fbSHong Zhang 
28840be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
28850be760fbSHong Zhang 
2886f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
28870be760fbSHong Zhang   b->singlemalloc = PETSC_FALSE;
28880be760fbSHong Zhang   b->free_a       = PETSC_TRUE;
28890be760fbSHong Zhang   b->free_ij      = PETSC_TRUE;
28902205254eSKarl Rupp 
28919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
28922205254eSKarl Rupp 
28930be760fbSHong Zhang   b->j         = uj;
28940be760fbSHong Zhang   b->i         = ui;
28950be760fbSHong Zhang   b->diag      = udiag;
28960be760fbSHong Zhang   b->free_diag = PETSC_TRUE;
2897f4259b30SLisandro Dalcin   b->ilen      = NULL;
2898f4259b30SLisandro Dalcin   b->imax      = NULL;
28990be760fbSHong Zhang   b->row       = perm;
29000be760fbSHong Zhang   b->col       = perm;
290126fbe8dcSKarl Rupp 
29029566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
29039566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
29042205254eSKarl Rupp 
29050be760fbSHong Zhang   b->icol          = iperm;
29060be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
290726fbe8dcSKarl Rupp 
29089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
29092205254eSKarl Rupp 
29100be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
29110be760fbSHong Zhang 
2912f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2913f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
29140be760fbSHong Zhang   if (ai[am] != 0) {
29156a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
291695b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
29170be760fbSHong Zhang   } else {
2918f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
29190be760fbSHong Zhang   }
29209263d837SHong Zhang #if defined(PETSC_USE_INFO)
29219263d837SHong Zhang   if (ai[am] != 0) {
29229263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
29239566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
29249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
29259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
29269263d837SHong Zhang   } else {
29279566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
29289263d837SHong Zhang   }
29299263d837SHong Zhang #endif
293035233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
29313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29320be760fbSHong Zhang }
29330be760fbSHong Zhang 
2934ff6a9541SJacob Faibussowitsch #if 0
2935ff6a9541SJacob Faibussowitsch // unused
2936ff6a9541SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2937d71ae5a4SJacob Faibussowitsch {
2938f76d2b81SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
293910c27e3dSHong Zhang   Mat_SeqSBAIJ      *b;
29409186b105SHong Zhang   PetscBool          perm_identity, missing;
294110c27e3dSHong Zhang   PetscReal          fill = info->fill;
29425d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
29435d0c19d7SBarry Smith   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
294410c27e3dSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
29452ed133dbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
29460298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
294710c27e3dSHong Zhang   PetscBT            lnkbt;
29482ed133dbSHong Zhang   IS                 iperm;
2949f76d2b81SHong Zhang 
2950f76d2b81SHong Zhang   PetscFunctionBegin;
295108401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
29529566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
295328b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
29549186b105SHong Zhang 
29552ed133dbSHong Zhang   /* check whether perm is the identity mapping */
29569566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
29579566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
29589566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
29599566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
296010c27e3dSHong Zhang 
296110c27e3dSHong Zhang   /* initialization */
29629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &ui));
296310c27e3dSHong Zhang   ui[0] = 0;
296410c27e3dSHong Zhang 
296510c27e3dSHong Zhang   /* jl: linked list for storing indices of the pivot rows
29661393bc97SHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
29679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
29681393bc97SHong Zhang   for (i = 0; i < am; i++) {
29699371c9d4SSatish Balay     jl[i] = am;
29709371c9d4SSatish Balay     il[i] = 0;
2971f76d2b81SHong Zhang   }
297210c27e3dSHong Zhang 
297310c27e3dSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
29741393bc97SHong Zhang   nlnk = am + 1;
29759566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
297610c27e3dSHong Zhang 
29771393bc97SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
29789566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
297910c27e3dSHong Zhang   current_space = free_space;
298010c27e3dSHong Zhang 
29811393bc97SHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
298210c27e3dSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
298310c27e3dSHong Zhang     nzk   = 0;
29842ed133dbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
298594bad497SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
29862ed133dbSHong Zhang     ncols_upper = 0;
29872ed133dbSHong Zhang     for (j = 0; j < ncols; j++) {
29889bfd6278SHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
29892ed133dbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
29902ed133dbSHong Zhang         cols[ncols_upper] = i;
29912ed133dbSHong Zhang         ncols_upper++;
29922ed133dbSHong Zhang       }
29932ed133dbSHong Zhang     }
29949566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
299510c27e3dSHong Zhang     nzk += nlnk;
299610c27e3dSHong Zhang 
299710c27e3dSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
299810c27e3dSHong Zhang     prow = jl[k]; /* 1st pivot row */
299910c27e3dSHong Zhang 
300010c27e3dSHong Zhang     while (prow < k) {
300110c27e3dSHong Zhang       nextprow = jl[prow];
300210c27e3dSHong Zhang       /* merge prow into k-th row */
30031393bc97SHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
300410c27e3dSHong Zhang       jmax   = ui[prow + 1];
300510c27e3dSHong Zhang       ncols  = jmax - jmin;
30061393bc97SHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
30079566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
300810c27e3dSHong Zhang       nzk += nlnk;
300910c27e3dSHong Zhang 
301010c27e3dSHong Zhang       /* update il and jl for prow */
301110c27e3dSHong Zhang       if (jmin < jmax) {
301210c27e3dSHong Zhang         il[prow] = jmin;
30139371c9d4SSatish Balay         j        = *uj_ptr;
30149371c9d4SSatish Balay         jl[prow] = jl[j];
30159371c9d4SSatish Balay         jl[j]    = prow;
301610c27e3dSHong Zhang       }
301710c27e3dSHong Zhang       prow = nextprow;
301810c27e3dSHong Zhang     }
301910c27e3dSHong Zhang 
302010c27e3dSHong Zhang     /* if free space is not available, make more free space */
302110c27e3dSHong Zhang     if (current_space->local_remaining < nzk) {
30221393bc97SHong Zhang       i = am - k + 1;                     /* num of unfactored rows */
302310c27e3dSHong Zhang       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
30249566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
302510c27e3dSHong Zhang       reallocs++;
302610c27e3dSHong Zhang     }
302710c27e3dSHong Zhang 
302810c27e3dSHong Zhang     /* copy data into free space, then initialize lnk */
30299566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
303010c27e3dSHong Zhang 
303110c27e3dSHong Zhang     /* add the k-th row into il and jl */
303210c27e3dSHong Zhang     if (nzk - 1 > 0) {
30331393bc97SHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
30349371c9d4SSatish Balay       jl[k] = jl[i];
30359371c9d4SSatish Balay       jl[i] = k;
303610c27e3dSHong Zhang       il[k] = ui[k] + 1;
303710c27e3dSHong Zhang     }
30382ed133dbSHong Zhang     ui_ptr[k] = current_space->array;
30392205254eSKarl Rupp 
304010c27e3dSHong Zhang     current_space->array += nzk;
304110c27e3dSHong Zhang     current_space->local_used += nzk;
304210c27e3dSHong Zhang     current_space->local_remaining -= nzk;
304310c27e3dSHong Zhang 
304410c27e3dSHong Zhang     ui[k + 1] = ui[k] + nzk;
304510c27e3dSHong Zhang   }
304610c27e3dSHong Zhang 
30476cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
30481393bc97SHong Zhang   if (ai[am] != 0) {
30491393bc97SHong Zhang     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
30509566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
30519566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
30529566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
305310c27e3dSHong Zhang   } else {
30549566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
305510c27e3dSHong Zhang   }
305663ba0a88SBarry Smith   #endif
305710c27e3dSHong Zhang 
30589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
30599566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
30609566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
306110c27e3dSHong Zhang 
306210c27e3dSHong Zhang   /* destroy list of free space and other temporary array(s) */
30639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
30649566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
30659566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
306610c27e3dSHong Zhang 
306710c27e3dSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
306810c27e3dSHong Zhang 
3069f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
307010c27e3dSHong Zhang   b->singlemalloc = PETSC_FALSE;
3071e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
3072e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
30732205254eSKarl Rupp 
30749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
30752205254eSKarl Rupp 
307610c27e3dSHong Zhang   b->j    = uj;
307710c27e3dSHong Zhang   b->i    = ui;
3078f4259b30SLisandro Dalcin   b->diag = NULL;
3079f4259b30SLisandro Dalcin   b->ilen = NULL;
3080f4259b30SLisandro Dalcin   b->imax = NULL;
308110c27e3dSHong Zhang   b->row  = perm;
30829bfd6278SHong Zhang   b->col  = perm;
30832205254eSKarl Rupp 
30849566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
30862205254eSKarl Rupp 
30879bfd6278SHong Zhang   b->icol          = iperm;
308810c27e3dSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
30892205254eSKarl Rupp 
30909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
30911393bc97SHong Zhang   b->maxnz = b->nz = ui[am];
309210c27e3dSHong Zhang 
3093f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
3094f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
30951393bc97SHong Zhang   if (ai[am] != 0) {
3096f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
309710c27e3dSHong Zhang   } else {
3098f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
309910c27e3dSHong Zhang   }
31000a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
31013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3102f76d2b81SHong Zhang }
3103ff6a9541SJacob Faibussowitsch #endif
3104599ef60dSHong Zhang 
3105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3106d71ae5a4SJacob Faibussowitsch {
3107813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3108813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
3109813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3110813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
3111813a1f2bSShri Abhyankar   const PetscScalar *b;
3112813a1f2bSShri Abhyankar   const MatScalar   *aa = a->a, *v;
3113813a1f2bSShri Abhyankar   PetscInt           i, nz;
3114813a1f2bSShri Abhyankar 
3115813a1f2bSShri Abhyankar   PetscFunctionBegin;
31163ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3117813a1f2bSShri Abhyankar 
31189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
31199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3120813a1f2bSShri Abhyankar 
3121813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
3122813a1f2bSShri Abhyankar   x[0] = b[0];
3123813a1f2bSShri Abhyankar   v    = aa;
3124813a1f2bSShri Abhyankar   vi   = aj;
3125813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
3126813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
3127813a1f2bSShri Abhyankar     sum = b[i];
3128813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3129813a1f2bSShri Abhyankar     v += nz;
3130813a1f2bSShri Abhyankar     vi += nz;
3131813a1f2bSShri Abhyankar     x[i] = sum;
3132813a1f2bSShri Abhyankar   }
3133813a1f2bSShri Abhyankar 
3134813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
313562fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
31363c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
31373c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3138813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3139813a1f2bSShri Abhyankar     sum = x[i];
3140813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
31413c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3142813a1f2bSShri Abhyankar   }
3143813a1f2bSShri Abhyankar 
31449566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
31459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
31469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
31473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3148813a1f2bSShri Abhyankar }
3149813a1f2bSShri Abhyankar 
3150d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3151d71ae5a4SJacob Faibussowitsch {
3152f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3153f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
3154f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3155f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
3156f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
3157f268cba8SShri Abhyankar   const PetscScalar *b;
3158f268cba8SShri Abhyankar   const MatScalar   *aa = a->a, *v;
3159f268cba8SShri Abhyankar 
3160f268cba8SShri Abhyankar   PetscFunctionBegin;
31613ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3162f268cba8SShri Abhyankar 
31639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
31649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3165f268cba8SShri Abhyankar   tmp = a->solve_work;
3166f268cba8SShri Abhyankar 
31679371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
31689371c9d4SSatish Balay   r = rout;
31699371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
31709371c9d4SSatish Balay   c = cout;
3171f268cba8SShri Abhyankar 
3172f268cba8SShri Abhyankar   /* forward solve the lower triangular */
3173f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
3174f268cba8SShri Abhyankar   v      = aa;
3175f268cba8SShri Abhyankar   vi     = aj;
3176f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
3177f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
3178f268cba8SShri Abhyankar     sum = b[r[i]];
3179f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3180f268cba8SShri Abhyankar     tmp[i] = sum;
31819371c9d4SSatish Balay     v += nz;
31829371c9d4SSatish Balay     vi += nz;
3183f268cba8SShri Abhyankar   }
3184f268cba8SShri Abhyankar 
3185f268cba8SShri Abhyankar   /* backward solve the upper triangular */
3186f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
31873c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
31883c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3189f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3190f268cba8SShri Abhyankar     sum = tmp[i];
3191f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3192f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3193f268cba8SShri Abhyankar   }
3194f268cba8SShri Abhyankar 
31959566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
31969566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
31979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
31989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
31999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
32003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3201f268cba8SShri Abhyankar }
3202f268cba8SShri Abhyankar 
3203ff6a9541SJacob Faibussowitsch #if 0
3204ff6a9541SJacob Faibussowitsch // unused
3205fe97e370SBarry Smith /*
32066aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3207fe97e370SBarry Smith */
3208ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3209d71ae5a4SJacob Faibussowitsch {
32101d098868SHong Zhang   Mat             B = *fact;
3211599ef60dSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3212599ef60dSHong Zhang   IS              isicol;
32131d098868SHong Zhang   const PetscInt *r, *ic;
32141d098868SHong Zhang   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3215dc3a2fd3SHong Zhang   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3216f61a948aSLisandro Dalcin   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
32171d098868SHong Zhang   PetscInt        nlnk, *lnk;
32181d098868SHong Zhang   PetscBT         lnkbt;
3219ace3abfcSBarry Smith   PetscBool       row_identity, icol_identity;
32208aee2decSHong Zhang   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
32211d098868SHong Zhang   const PetscInt *ics;
32221d098868SHong Zhang   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3223a2ea699eSBarry Smith   PetscReal       dt = info->dt, shift = info->shiftamount;
3224f61a948aSLisandro Dalcin   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3225ace3abfcSBarry Smith   PetscBool       missing;
3226599ef60dSHong Zhang 
3227599ef60dSHong Zhang   PetscFunctionBegin;
322813bcc0bdSJacob Faibussowitsch   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3229f61a948aSLisandro Dalcin   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3230f61a948aSLisandro Dalcin 
32312ef1f0ffSBarry Smith   /* symbolic factorization, can be reused */
32329566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
323328b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
32341d098868SHong Zhang   adiag = a->diag;
3235599ef60dSHong Zhang 
32369566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3237599ef60dSHong Zhang 
3238599ef60dSHong Zhang   /* bdiag is location of diagonal in factor */
32399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
32409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3241599ef60dSHong Zhang 
32421d098868SHong Zhang   /* allocate row pointers bi */
32439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * n + 2, &bi));
32441d098868SHong Zhang 
32451d098868SHong Zhang   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3246393d3a68SHong Zhang   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
32471d098868SHong Zhang   nnz_max = ai[n] + 2 * n * dtcount + 2;
32488fc3a347SHong Zhang 
32499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
32509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3251599ef60dSHong Zhang 
3252599ef60dSHong Zhang   /* put together the new matrix */
32539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3254f284f12aSHong Zhang   b = (Mat_SeqAIJ *)B->data;
32552205254eSKarl Rupp 
3256599ef60dSHong Zhang   b->free_a       = PETSC_TRUE;
3257599ef60dSHong Zhang   b->free_ij      = PETSC_TRUE;
3258599ef60dSHong Zhang   b->singlemalloc = PETSC_FALSE;
32592205254eSKarl Rupp 
3260599ef60dSHong Zhang   b->a    = ba;
3261599ef60dSHong Zhang   b->j    = bj;
3262599ef60dSHong Zhang   b->i    = bi;
3263599ef60dSHong Zhang   b->diag = bdiag;
3264f4259b30SLisandro Dalcin   b->ilen = NULL;
3265f4259b30SLisandro Dalcin   b->imax = NULL;
3266599ef60dSHong Zhang   b->row  = isrow;
3267599ef60dSHong Zhang   b->col  = iscol;
32689566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
32699566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3270599ef60dSHong Zhang   b->icol = isicol;
3271599ef60dSHong Zhang 
32729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
32731d098868SHong Zhang   b->maxnz = nnz_max;
3274599ef60dSHong Zhang 
3275d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_ILUDT;
3276f284f12aSHong Zhang   B->info.factor_mallocs   = 0;
3277f284f12aSHong Zhang   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
32782ef1f0ffSBarry Smith   /*  end of symbolic factorization */
3279599ef60dSHong Zhang 
32809566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
32819566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
3282599ef60dSHong Zhang   ics = ic;
3283599ef60dSHong Zhang 
3284599ef60dSHong Zhang   /* linked list for storing column indices of the active row */
3285599ef60dSHong Zhang   nlnk = n + 1;
32869566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
32871d098868SHong Zhang 
32881d098868SHong Zhang   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
32899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
32901d098868SHong Zhang   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
32919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
32929566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n));
3293599ef60dSHong Zhang 
3294599ef60dSHong Zhang   bi[0]         = 0;
32958fc3a347SHong Zhang   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3296dc3a2fd3SHong Zhang   bdiag_rev[n]  = bdiag[0];
32978fc3a347SHong Zhang   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3298599ef60dSHong Zhang   for (i = 0; i < n; i++) {
3299599ef60dSHong Zhang     /* copy initial fill into linked list */
33008369b28dSHong Zhang     nzi = ai[r[i] + 1] - ai[r[i]];
330194bad497SJacob Faibussowitsch     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
33028369b28dSHong Zhang     nzi_al = adiag[r[i]] - ai[r[i]];
33038369b28dSHong Zhang     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3304599ef60dSHong Zhang     ajtmp  = aj + ai[r[i]];
33059566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3306599ef60dSHong Zhang 
3307599ef60dSHong Zhang     /* load in initial (unfactored row) */
33081d098868SHong Zhang     aatmp = a->a + ai[r[i]];
3309ad540459SPierre Jolivet     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3310599ef60dSHong Zhang 
3311599ef60dSHong Zhang     /* add pivot rows into linked list */
3312599ef60dSHong Zhang     row = lnk[n];
3313599ef60dSHong Zhang     while (row < i) {
33141d098868SHong Zhang       nzi_bl = bi[row + 1] - bi[row] + 1;
33151d098868SHong Zhang       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
33169566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3317599ef60dSHong Zhang       nzi += nlnk;
3318599ef60dSHong Zhang       row = lnk[row];
3319599ef60dSHong Zhang     }
3320599ef60dSHong Zhang 
33218369b28dSHong Zhang     /* copy data from lnk into jtmp, then initialize lnk */
33229566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3323599ef60dSHong Zhang 
3324599ef60dSHong Zhang     /* numerical factorization */
33258369b28dSHong Zhang     bjtmp = jtmp;
3326599ef60dSHong Zhang     row   = *bjtmp++; /* 1st pivot row */
3327599ef60dSHong Zhang     while (row < i) {
3328599ef60dSHong Zhang       pc         = rtmp + row;
33293c2a7987SHong Zhang       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
33303c2a7987SHong Zhang       multiplier = (*pc) * (*pv);
3331599ef60dSHong Zhang       *pc        = multiplier;
33323c2a7987SHong Zhang       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
33331d098868SHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
33341d098868SHong Zhang         pv = ba + bdiag[row + 1] + 1;
33351d098868SHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
33361d098868SHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
33379566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3338599ef60dSHong Zhang       }
3339599ef60dSHong Zhang       row = *bjtmp++;
3340599ef60dSHong Zhang     }
3341599ef60dSHong Zhang 
33428369b28dSHong Zhang     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
33438aee2decSHong Zhang     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
33449371c9d4SSatish Balay     nzi_bl   = 0;
33459371c9d4SSatish Balay     j        = 0;
33468369b28dSHong Zhang     while (jtmp[j] < i) { /* Note: jtmp is sorted */
33479371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
33489371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
33499371c9d4SSatish Balay       nzi_bl++;
33509371c9d4SSatish Balay       j++;
33518369b28dSHong Zhang     }
33528369b28dSHong Zhang     nzi_bu = nzi - nzi_bl - 1;
33538369b28dSHong Zhang     while (j < nzi) {
33549371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
33559371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
33568369b28dSHong Zhang       j++;
33578369b28dSHong Zhang     }
33588369b28dSHong Zhang 
3359599ef60dSHong Zhang     bjtmp = bj + bi[i];
3360599ef60dSHong Zhang     batmp = ba + bi[i];
33618369b28dSHong Zhang     /* apply level dropping rule to L part */
33628369b28dSHong Zhang     ncut = nzi_al + dtcount;
33638369b28dSHong Zhang     if (ncut < nzi_bl) {
33649566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
33659566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3366599ef60dSHong Zhang     } else {
33678369b28dSHong Zhang       ncut = nzi_bl;
33688369b28dSHong Zhang     }
33698369b28dSHong Zhang     for (j = 0; j < ncut; j++) {
33708369b28dSHong Zhang       bjtmp[j] = jtmp[j];
33718369b28dSHong Zhang       batmp[j] = vtmp[j];
33728369b28dSHong Zhang     }
33731d098868SHong Zhang     bi[i + 1] = bi[i] + ncut;
33748369b28dSHong Zhang     nzi       = ncut + 1;
33758369b28dSHong Zhang 
33768369b28dSHong Zhang     /* apply level dropping rule to U part */
33778369b28dSHong Zhang     ncut = nzi_au + dtcount;
33788369b28dSHong Zhang     if (ncut < nzi_bu) {
33799566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
33809566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
33818369b28dSHong Zhang     } else {
33828369b28dSHong Zhang       ncut = nzi_bu;
33838369b28dSHong Zhang     }
33848369b28dSHong Zhang     nzi += ncut;
33851d098868SHong Zhang 
33861d098868SHong Zhang     /* mark bdiagonal */
33871d098868SHong Zhang     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3388dc3a2fd3SHong Zhang     bdiag_rev[n - i - 1] = bdiag[i + 1];
33898fc3a347SHong Zhang     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
33901d098868SHong Zhang     bjtmp                = bj + bdiag[i];
33911d098868SHong Zhang     batmp                = ba + bdiag[i];
33921d098868SHong Zhang     *bjtmp               = i;
33938aee2decSHong Zhang     *batmp               = diag_tmp; /* rtmp[i]; */
3394ad540459SPierre Jolivet     if (*batmp == 0.0) *batmp = dt + shift;
3395a5b23f4aSJose E. Roman     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
33961d098868SHong Zhang 
33971d098868SHong Zhang     bjtmp = bj + bdiag[i + 1] + 1;
33981d098868SHong Zhang     batmp = ba + bdiag[i + 1] + 1;
33998369b28dSHong Zhang     for (k = 0; k < ncut; k++) {
34001d098868SHong Zhang       bjtmp[k] = jtmp[nzi_bl + 1 + k];
34011d098868SHong Zhang       batmp[k] = vtmp[nzi_bl + 1 + k];
3402599ef60dSHong Zhang     }
3403599ef60dSHong Zhang 
3404599ef60dSHong Zhang     im[i] = nzi; /* used by PetscLLAddSortedLU() */
34058369b28dSHong Zhang   }              /* for (i=0; i<n; i++) */
340663a3b9bcSJacob Faibussowitsch   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3407599ef60dSHong Zhang 
34089566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
34099566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3410599ef60dSHong Zhang 
34119566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
34129566063dSJacob Faibussowitsch   PetscCall(PetscFree2(im, jtmp));
34139566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, vtmp));
34149566063dSJacob Faibussowitsch   PetscCall(PetscFree(bdiag_rev));
3415599ef60dSHong Zhang 
34169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n));
34171d098868SHong Zhang   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3418599ef60dSHong Zhang 
34199566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
34209566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &icol_identity));
3421599ef60dSHong Zhang   if (row_identity && icol_identity) {
342235233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3423599ef60dSHong Zhang   } else {
342435233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ;
3425599ef60dSHong Zhang   }
3426599ef60dSHong Zhang 
3427f4259b30SLisandro Dalcin   B->ops->solveadd          = NULL;
3428f4259b30SLisandro Dalcin   B->ops->solvetranspose    = NULL;
3429f4259b30SLisandro Dalcin   B->ops->solvetransposeadd = NULL;
3430f4259b30SLisandro Dalcin   B->ops->matsolve          = NULL;
3431599ef60dSHong Zhang   B->assembled              = PETSC_TRUE;
3432599ef60dSHong Zhang   B->preallocated           = PETSC_TRUE;
34333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3434599ef60dSHong Zhang }
3435599ef60dSHong Zhang 
3436da81f932SPierre Jolivet /* a wrapper of MatILUDTFactor_SeqAIJ() */
3437fe97e370SBarry Smith /*
34386aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3439fe97e370SBarry Smith */
3440fe97e370SBarry Smith 
3441ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3442d71ae5a4SJacob Faibussowitsch {
34433c2a7987SHong Zhang   PetscFunctionBegin;
34449566063dSJacob Faibussowitsch   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
34453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34463c2a7987SHong Zhang }
34473c2a7987SHong Zhang 
34483c2a7987SHong Zhang /*
34493c2a7987SHong Zhang    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
34503c2a7987SHong Zhang    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
34513c2a7987SHong Zhang */
3452fe97e370SBarry Smith /*
34536aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3454fe97e370SBarry Smith */
3455fe97e370SBarry Smith 
3456ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3457d71ae5a4SJacob Faibussowitsch {
34583c2a7987SHong Zhang   Mat             C = fact;
34593c2a7987SHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
34603c2a7987SHong Zhang   IS              isrow = b->row, isicol = b->icol;
34613c2a7987SHong Zhang   const PetscInt *r, *ic, *ics;
3462b78a477dSHong Zhang   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3463b78a477dSHong Zhang   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
34643c2a7987SHong Zhang   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3465182b8fbaSHong Zhang   PetscReal       dt = info->dt, shift = info->shiftamount;
3466ace3abfcSBarry Smith   PetscBool       row_identity, col_identity;
34673c2a7987SHong Zhang 
34683c2a7987SHong Zhang   PetscFunctionBegin;
34699566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
34709566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
34719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
34723c2a7987SHong Zhang   ics = ic;
34733c2a7987SHong Zhang 
34743c2a7987SHong Zhang   for (i = 0; i < n; i++) {
3475b78a477dSHong Zhang     /* initialize rtmp array */
3476*bbea24aaSStefano Zampini     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
34773c2a7987SHong Zhang     bjtmp = bj + bi[i];
3478b78a477dSHong Zhang     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3479b78a477dSHong Zhang     rtmp[i] = 0.0;
3480*bbea24aaSStefano Zampini     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3481b78a477dSHong Zhang     bjtmp   = bj + bdiag[i + 1] + 1;
3482b78a477dSHong Zhang     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
34833c2a7987SHong Zhang 
3484b78a477dSHong Zhang     /* load in initial unfactored row of A */
34853c2a7987SHong Zhang     nz    = ai[r[i] + 1] - ai[r[i]];
34863c2a7987SHong Zhang     ajtmp = aj + ai[r[i]];
34873c2a7987SHong Zhang     v     = aa + ai[r[i]];
3488ad540459SPierre Jolivet     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
34893c2a7987SHong Zhang 
3490b78a477dSHong Zhang     /* numerical factorization */
3491b78a477dSHong Zhang     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3492b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3493b78a477dSHong Zhang     k     = 0;
3494b78a477dSHong Zhang     while (k < nzl) {
34953c2a7987SHong Zhang       row        = *bjtmp++;
34963c2a7987SHong Zhang       pc         = rtmp + row;
3497b78a477dSHong Zhang       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3498b78a477dSHong Zhang       multiplier = (*pc) * (*pv);
34993c2a7987SHong Zhang       *pc        = multiplier;
3500b78a477dSHong Zhang       if (PetscAbsScalar(multiplier) > dt) {
3501b78a477dSHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3502b78a477dSHong Zhang         pv = b->a + bdiag[row + 1] + 1;
3503b78a477dSHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3504b78a477dSHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
35059566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
35063c2a7987SHong Zhang       }
3507b78a477dSHong Zhang       k++;
35083c2a7987SHong Zhang     }
35093c2a7987SHong Zhang 
3510b78a477dSHong Zhang     /* finished row so stick it into b->a */
3511b78a477dSHong Zhang     /* L-part */
3512b78a477dSHong Zhang     pv  = b->a + bi[i];
3513b78a477dSHong Zhang     pj  = bj + bi[i];
3514b78a477dSHong Zhang     nzl = bi[i + 1] - bi[i];
3515ad540459SPierre Jolivet     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3516b78a477dSHong Zhang 
3517a5b23f4aSJose E. Roman     /* diagonal: invert diagonal entries for simpler triangular solves */
3518b78a477dSHong Zhang     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3519b78a477dSHong Zhang     b->a[bdiag[i]] = 1.0 / rtmp[i];
3520b78a477dSHong Zhang 
3521b78a477dSHong Zhang     /* U-part */
3522b78a477dSHong Zhang     pv  = b->a + bdiag[i + 1] + 1;
3523b78a477dSHong Zhang     pj  = bj + bdiag[i + 1] + 1;
3524b78a477dSHong Zhang     nzu = bdiag[i] - bdiag[i + 1] - 1;
3525ad540459SPierre Jolivet     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3526b78a477dSHong Zhang   }
3527b78a477dSHong Zhang 
35289566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
35299566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
35309566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3531b78a477dSHong Zhang 
35329566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
35339566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
35343c2a7987SHong Zhang   if (row_identity && col_identity) {
353535233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
35363c2a7987SHong Zhang   } else {
353735233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
35383c2a7987SHong Zhang   }
3539f4259b30SLisandro Dalcin   C->ops->solveadd          = NULL;
3540f4259b30SLisandro Dalcin   C->ops->solvetranspose    = NULL;
3541f4259b30SLisandro Dalcin   C->ops->solvetransposeadd = NULL;
3542f4259b30SLisandro Dalcin   C->ops->matsolve          = NULL;
35433c2a7987SHong Zhang   C->assembled              = PETSC_TRUE;
35443c2a7987SHong Zhang   C->preallocated           = PETSC_TRUE;
35452205254eSKarl Rupp 
35469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
35473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35483c2a7987SHong Zhang }
3549ff6a9541SJacob Faibussowitsch #endif
3550