xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 84648c2d64fd70938d2f71c337cda3739c77ab03)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
3c6db04a5SJed Brown #include <petscbt.h>
4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
53b2fbd54SBarry Smith 
68fa24674SBarry Smith /*
78fa24674SBarry Smith       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8312e372aSBarry Smith 
9312e372aSBarry Smith       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
108fa24674SBarry Smith */
11ff6a9541SJacob Faibussowitsch #if 0
12d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13d71ae5a4SJacob Faibussowitsch {
148fa24674SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15889a8dedSBarry Smith   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
168fa24674SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j;
178fa24674SBarry Smith   const PetscScalar *aa = a->a;
18ace3abfcSBarry Smith   PetscBool         *done;
19889a8dedSBarry Smith   PetscReal          best, past = 0, future;
203a40ed3dSBarry Smith 
218fa24674SBarry Smith   PetscFunctionBegin;
228fa24674SBarry Smith   /* pick initial row */
238fa24674SBarry Smith   best = -1;
248fa24674SBarry Smith   for (i = 0; i < n; i++) {
2575567043SBarry Smith     future = 0.0;
268fa24674SBarry Smith     for (j = ai[i]; j < ai[i + 1]; j++) {
27db4deed7SKarl Rupp       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28db4deed7SKarl Rupp       else past = PetscAbsScalar(aa[j]);
298fa24674SBarry Smith     }
308fa24674SBarry Smith     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
318fa24674SBarry Smith     if (past / future > best) {
328fa24674SBarry Smith       best    = past / future;
338fa24674SBarry Smith       current = i;
348fa24674SBarry Smith     }
358fa24674SBarry Smith   }
368fa24674SBarry Smith 
379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &done));
389566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(done, n));
399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &order));
408fa24674SBarry Smith   order[0] = current;
418fa24674SBarry Smith   for (i = 0; i < n - 1; i++) {
428fa24674SBarry Smith     done[current] = PETSC_TRUE;
438fa24674SBarry Smith     best          = -1;
448fa24674SBarry Smith     /* loop over all neighbors of current pivot */
458fa24674SBarry Smith     for (j = ai[current]; j < ai[current + 1]; j++) {
468fa24674SBarry Smith       jj = aj[j];
478fa24674SBarry Smith       if (done[jj]) continue;
488fa24674SBarry Smith       /* loop over columns of potential next row computing weights for below and above diagonal */
498fa24674SBarry Smith       past = future = 0.0;
508fa24674SBarry Smith       for (k = ai[jj]; k < ai[jj + 1]; k++) {
518fa24674SBarry Smith         kk = aj[k];
528fa24674SBarry Smith         if (done[kk]) past += PetscAbsScalar(aa[k]);
538fa24674SBarry Smith         else if (kk != jj) future += PetscAbsScalar(aa[k]);
548fa24674SBarry Smith       }
558fa24674SBarry Smith       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
568fa24674SBarry Smith       if (past / future > best) {
578fa24674SBarry Smith         best       = past / future;
588fa24674SBarry Smith         newcurrent = jj;
598fa24674SBarry Smith       }
608fa24674SBarry Smith     }
618fa24674SBarry Smith     if (best == -1) { /* no neighbors to select from so select best of all that remain */
628fa24674SBarry Smith       best = -1;
638fa24674SBarry Smith       for (k = 0; k < n; k++) {
648fa24674SBarry Smith         if (done[k]) continue;
6575567043SBarry Smith         future = 0.0;
6675567043SBarry Smith         past   = 0.0;
678fa24674SBarry Smith         for (j = ai[k]; j < ai[k + 1]; j++) {
688fa24674SBarry Smith           kk = aj[j];
698fa24674SBarry Smith           if (done[kk]) past += PetscAbsScalar(aa[j]);
708fa24674SBarry Smith           else if (kk != k) future += PetscAbsScalar(aa[j]);
718fa24674SBarry Smith         }
728fa24674SBarry Smith         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
738fa24674SBarry Smith         if (past / future > best) {
748fa24674SBarry Smith           best       = past / future;
758fa24674SBarry Smith           newcurrent = k;
768fa24674SBarry Smith         }
778fa24674SBarry Smith       }
788fa24674SBarry Smith     }
7908401ef6SPierre Jolivet     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
808fa24674SBarry Smith     current      = newcurrent;
818fa24674SBarry Smith     order[i + 1] = current;
828fa24674SBarry Smith   }
839566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
848fa24674SBarry Smith   *icol = *irow;
859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)*irow));
869566063dSJacob Faibussowitsch   PetscCall(PetscFree(done));
879566063dSJacob Faibussowitsch   PetscCall(PetscFree(order));
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
893b2fbd54SBarry Smith }
90ff6a9541SJacob Faibussowitsch #endif
913b2fbd54SBarry Smith 
92d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93d71ae5a4SJacob Faibussowitsch {
944ac6704cSBarry Smith   PetscFunctionBegin;
954ac6704cSBarry Smith   *type = MATSOLVERPETSC;
963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
974ac6704cSBarry Smith }
984ac6704cSBarry Smith 
99d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100d71ae5a4SJacob Faibussowitsch {
101d0f46423SBarry Smith   PetscInt n = A->rmap->n;
102b24902e0SBarry Smith 
103b24902e0SBarry Smith   PetscFunctionBegin;
104891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX)
10503e5aca4SStefano Zampini   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
10603e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
10703e5aca4SStefano Zampini     *B = NULL;
10803e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
10903e5aca4SStefano Zampini   }
110891bceaeSHong Zhang #endif
11103e5aca4SStefano Zampini 
1129566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
1139566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
114599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
1159566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
1162205254eSKarl Rupp 
11735233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
11835233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
1192205254eSKarl Rupp 
1209566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
1219566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
1229566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
1239566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1259566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
1269566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
1272205254eSKarl Rupp 
12835233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
12935233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
1309566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
1319566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133d5f3da31SBarry Smith   (*B)->factortype = ftype;
13400c67f3bSHong Zhang 
1359566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
1369566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
1389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
1393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140b24902e0SBarry Smith }
141b24902e0SBarry Smith 
142ff6a9541SJacob Faibussowitsch #if 0
143ff6a9541SJacob Faibussowitsch // currently unused
144ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145d71ae5a4SJacob Faibussowitsch {
146416022c9SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147289bc588SBarry Smith   IS                 isicol;
1485d0c19d7SBarry Smith   const PetscInt    *r, *ic;
1495d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
1501393bc97SHong Zhang   PetscInt          *bi, *bj, *ajtmp;
1511393bc97SHong Zhang   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
1529e046878SBarry Smith   PetscReal          f;
1534875ba38SHong Zhang   PetscInt           nlnk, *lnk, k, **bi_ptr;
1540298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1551393bc97SHong Zhang   PetscBT            lnkbt;
156ece542f9SHong Zhang   PetscBool          missing;
157289bc588SBarry Smith 
1583a40ed3dSBarry Smith   PetscFunctionBegin;
15908401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
1609566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
16128b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162ece542f9SHong Zhang 
1639566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1649566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
1659566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
166289bc588SBarry Smith 
167289bc588SBarry Smith   /* get new row pointers */
1689f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1691393bc97SHong Zhang   bi[0] = 0;
1701393bc97SHong Zhang 
1711393bc97SHong Zhang   /* bdiag is location of diagonal in factor */
1729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1731393bc97SHong Zhang   bdiag[0] = 0;
1741393bc97SHong Zhang 
1751393bc97SHong Zhang   /* linked list for storing column indices of the active row */
1761393bc97SHong Zhang   nlnk = n + 1;
1779566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
1789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
1791393bc97SHong Zhang 
1801393bc97SHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
181d3d32019SBarry Smith   f = info->fill;
182aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
1839566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1841393bc97SHong Zhang   current_space = free_space;
185289bc588SBarry Smith 
186289bc588SBarry Smith   for (i = 0; i < n; i++) {
1871393bc97SHong Zhang     /* copy previous fill into linked list */
188289bc588SBarry Smith     nzi   = 0;
1891393bc97SHong Zhang     nnz   = ai[r[i] + 1] - ai[r[i]];
1901393bc97SHong Zhang     ajtmp = aj + ai[r[i]];
1919566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1921393bc97SHong Zhang     nzi += nlnk;
1931393bc97SHong Zhang 
1941393bc97SHong Zhang     /* add pivot rows into linked list */
1951393bc97SHong Zhang     row = lnk[n];
1961393bc97SHong Zhang     while (row < i) {
197d1170560SHong Zhang       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198d1170560SHong Zhang       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
1999566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
2001393bc97SHong Zhang       nzi += nlnk;
2011393bc97SHong Zhang       row = lnk[row];
202289bc588SBarry Smith     }
2031393bc97SHong Zhang     bi[i + 1] = bi[i] + nzi;
2041393bc97SHong Zhang     im[i]     = nzi;
2051393bc97SHong Zhang 
2061393bc97SHong Zhang     /* mark bdiag */
2071393bc97SHong Zhang     nzbd = 0;
2081393bc97SHong Zhang     nnz  = nzi;
2091393bc97SHong Zhang     k    = lnk[n];
2101393bc97SHong Zhang     while (nnz-- && k < i) {
2111393bc97SHong Zhang       nzbd++;
2121393bc97SHong Zhang       k = lnk[k];
2131393bc97SHong Zhang     }
2141393bc97SHong Zhang     bdiag[i] = bi[i] + nzbd;
2151393bc97SHong Zhang 
2161393bc97SHong Zhang     /* if free space is not available, make more free space */
2171393bc97SHong Zhang     if (current_space->local_remaining < nzi) {
218f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
2199566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2201393bc97SHong Zhang       reallocs++;
2211393bc97SHong Zhang     }
2221393bc97SHong Zhang 
2231393bc97SHong Zhang     /* copy data into free space, then initialize lnk */
2249566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
2252205254eSKarl Rupp 
2261393bc97SHong Zhang     bi_ptr[i] = current_space->array;
2271393bc97SHong Zhang     current_space->array += nzi;
2281393bc97SHong Zhang     current_space->local_used += nzi;
2291393bc97SHong Zhang     current_space->local_remaining -= nzi;
230289bc588SBarry Smith   }
2316cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
232464e8d44SSatish Balay   if (ai[n] != 0) {
2331393bc97SHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2349566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2369566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
2379566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
23805bf559cSSatish Balay   } else {
2399566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
24005bf559cSSatish Balay   }
24163ba0a88SBarry Smith   #endif
2422fb3ed76SBarry Smith 
2439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
2449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
2451987afe7SBarry Smith 
2461393bc97SHong Zhang   /* destroy list of free space and other temporary array(s) */
2479f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2489566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
2499566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
2509566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
251289bc588SBarry Smith 
252289bc588SBarry Smith   /* put together the new matrix */
2539566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
25457508eceSPierre Jolivet   b = (Mat_SeqAIJ *)B->data;
255e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
2569f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2579f0612e4SBarry Smith   b->free_a       = PETSC_TRUE;
2581393bc97SHong Zhang   b->j    = bj;
2591393bc97SHong Zhang   b->i    = bi;
2601393bc97SHong Zhang   b->diag = bdiag;
261f4259b30SLisandro Dalcin   b->ilen = NULL;
262f4259b30SLisandro Dalcin   b->imax = NULL;
263416022c9SBarry Smith   b->row  = isrow;
264416022c9SBarry Smith   b->col  = iscol;
2659566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
2669566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
26782bf6240SBarry Smith   b->icol = isicol;
2689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2691393bc97SHong Zhang 
2701393bc97SHong Zhang   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
2711393bc97SHong Zhang   b->maxnz = b->nz = bi[n];
2727fd98bd6SLois Curfman McInnes 
27357508eceSPierre Jolivet   B->factortype            = MAT_FACTOR_LU;
27457508eceSPierre Jolivet   B->info.factor_mallocs   = reallocs;
27557508eceSPierre Jolivet   B->info.fill_ratio_given = f;
276703deb49SSatish Balay 
2779f7d0b68SHong Zhang   if (ai[n]) {
27857508eceSPierre Jolivet     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279eea2913aSSatish Balay   } else {
28057508eceSPierre Jolivet     B->info.fill_ratio_needed = 0.0;
281eea2913aSSatish Balay   }
28257508eceSPierre Jolivet   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2834d12350bSJunchao Zhang   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
285289bc588SBarry Smith }
286ff6a9541SJacob Faibussowitsch #endif
2871393bc97SHong Zhang 
288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289d71ae5a4SJacob Faibussowitsch {
290ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291ce3d78c0SShri Abhyankar   IS                 isicol;
2928758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
2938758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
2948758e1faSBarry Smith   PetscInt          *bi, *bj;
295ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296ce3d78c0SShri Abhyankar   PetscReal          f;
297ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
2980298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
300ece542f9SHong Zhang   PetscBool          missing;
301ce3d78c0SShri Abhyankar 
302ce3d78c0SShri Abhyankar   PetscFunctionBegin;
30308401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
3049566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
30528b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306ece542f9SHong Zhang 
3079566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3089566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
3099566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
310ce3d78c0SShri Abhyankar 
3111bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
3129f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
3139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
314a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
3159b48462bSShri Abhyankar 
3169b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
3179b48462bSShri Abhyankar   nlnk = n + 1;
3189566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3199b48462bSShri Abhyankar 
3209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
3219b48462bSShri Abhyankar 
3229b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
3239b48462bSShri Abhyankar   f = info->fill;
324aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
3259566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
3269b48462bSShri Abhyankar   current_space = free_space;
3279b48462bSShri Abhyankar 
3289b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
3299b48462bSShri Abhyankar     /* copy previous fill into linked list */
3309b48462bSShri Abhyankar     nzi   = 0;
3319b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
3329b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
3339566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3349b48462bSShri Abhyankar     nzi += nlnk;
3359b48462bSShri Abhyankar 
3369b48462bSShri Abhyankar     /* add pivot rows into linked list */
3379b48462bSShri Abhyankar     row = lnk[n];
3389b48462bSShri Abhyankar     while (row < i) {
3399b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
3409b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
3419566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
3429b48462bSShri Abhyankar       nzi += nlnk;
3439b48462bSShri Abhyankar       row = lnk[row];
3449b48462bSShri Abhyankar     }
3459b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
3469b48462bSShri Abhyankar     im[i]     = nzi;
3479b48462bSShri Abhyankar 
3489b48462bSShri Abhyankar     /* mark bdiag */
3499b48462bSShri Abhyankar     nzbd = 0;
3509b48462bSShri Abhyankar     nnz  = nzi;
3519b48462bSShri Abhyankar     k    = lnk[n];
3529b48462bSShri Abhyankar     while (nnz-- && k < i) {
3539b48462bSShri Abhyankar       nzbd++;
3549b48462bSShri Abhyankar       k = lnk[k];
3559b48462bSShri Abhyankar     }
3569b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
3579b48462bSShri Abhyankar 
3589b48462bSShri Abhyankar     /* if free space is not available, make more free space */
3599b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
3602f18eb33SBarry Smith       /* estimated additional space needed */
3611df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
3629566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
3639b48462bSShri Abhyankar       reallocs++;
3649b48462bSShri Abhyankar     }
3659b48462bSShri Abhyankar 
3669b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
3679566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
3682205254eSKarl Rupp 
3699b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
3709b48462bSShri Abhyankar     current_space->array += nzi;
3719b48462bSShri Abhyankar     current_space->local_used += nzi;
3729b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
3739b48462bSShri Abhyankar   }
3749b48462bSShri Abhyankar 
3759566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3779b48462bSShri Abhyankar 
3789263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
3809566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
3819566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
3829566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
3839b48462bSShri Abhyankar 
3849b48462bSShri Abhyankar   /* put together the new matrix */
3859566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
38657508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)B->data;
3879b48462bSShri Abhyankar   b->free_ij = PETSC_TRUE;
3889f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
3899f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
3909b48462bSShri Abhyankar   b->j      = bj;
3919b48462bSShri Abhyankar   b->i      = bi;
3929b48462bSShri Abhyankar   b->diag   = bdiag;
393f4259b30SLisandro Dalcin   b->ilen   = NULL;
394f4259b30SLisandro Dalcin   b->imax   = NULL;
3959b48462bSShri Abhyankar   b->row    = isrow;
3969b48462bSShri Abhyankar   b->col    = iscol;
3979566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
3989566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3999b48462bSShri Abhyankar   b->icol = isicol;
400*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
4019b48462bSShri Abhyankar 
4029b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
4039b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
4042205254eSKarl Rupp 
405d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
406f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
407f284f12aSHong Zhang   B->info.fill_ratio_given = f;
4089b48462bSShri Abhyankar 
4099b48462bSShri Abhyankar   if (ai[n]) {
410f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
4119b48462bSShri Abhyankar   } else {
412f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
4139b48462bSShri Abhyankar   }
4149263d837SHong Zhang #if defined(PETSC_USE_INFO)
4159263d837SHong Zhang   if (ai[n] != 0) {
4169263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
4179566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
4189566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
4199566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
4209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
4219263d837SHong Zhang   } else {
4229566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
4239263d837SHong Zhang   }
4249263d837SHong Zhang #endif
42535233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
4264d12350bSJunchao Zhang   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
4279566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
4283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4299b48462bSShri Abhyankar }
4309b48462bSShri Abhyankar 
4315b5bc046SBarry Smith /*
4325b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
4335b5bc046SBarry Smith */
434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A)
435d71ae5a4SJacob Faibussowitsch {
436ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
4375b5bc046SBarry Smith 
4385b5bc046SBarry Smith   PetscFunctionBegin;
4399566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
4405b5bc046SBarry Smith   if (flg) {
4415b5bc046SBarry Smith     PetscViewer viewer;
4425b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
4435b5bc046SBarry Smith 
4449566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
4459566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
4469566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
4479566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
4485b5bc046SBarry Smith   }
4493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4505b5bc046SBarry Smith }
4515b5bc046SBarry Smith 
452d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453d71ae5a4SJacob Faibussowitsch {
454c9c72f8fSHong Zhang   Mat              C = B;
455c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
457c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
458bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
460bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
461bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
462b65878eeSJunchao Zhang   const MatScalar *aa, *v;
463b65878eeSJunchao Zhang   MatScalar       *ba;
464ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
465d90ac83dSHong Zhang   FactorShiftCtx   sctx;
4664f81c4b7SBarry Smith   const PetscInt  *ddiag;
467d4ad06f7SHong Zhang   PetscReal        rs;
468d4ad06f7SHong Zhang   MatScalar        d;
469d4ad06f7SHong Zhang 
470c9c72f8fSHong Zhang   PetscFunctionBegin;
471b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
472b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
473d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4749566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
475d4ad06f7SHong Zhang 
476f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477d4ad06f7SHong Zhang     ddiag          = a->diag;
478d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
479d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
480d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481d4ad06f7SHong Zhang       d  = (aa)[ddiag[i]];
482d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
483d4ad06f7SHong Zhang       v  = aa + ai[i];
484d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
4852205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
487d4ad06f7SHong Zhang     }
488d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
489d4ad06f7SHong Zhang     sctx.nshift_max = 5;
490d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
491d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
492d4ad06f7SHong Zhang   }
493d4ad06f7SHong Zhang 
4949566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4959566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
497c9c72f8fSHong Zhang   ics = ic;
498c9c72f8fSHong Zhang 
499d4ad06f7SHong Zhang   do {
50007b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
501c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
502c9c72f8fSHong Zhang       /* zero rtmp */
503c9c72f8fSHong Zhang       /* L part */
504c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
505c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
506c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
507c9c72f8fSHong Zhang 
508c9c72f8fSHong Zhang       /* U part */
50962fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
51062fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
511813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512813a1f2bSShri Abhyankar 
513813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
514813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
515813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
516813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
517ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518813a1f2bSShri Abhyankar       /* ZeropivotApply() */
51998a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
520813a1f2bSShri Abhyankar 
521813a1f2bSShri Abhyankar       /* elimination */
522813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
523813a1f2bSShri Abhyankar       row   = *bjtmp++;
524813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
525f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
526813a1f2bSShri Abhyankar         pc = rtmp + row;
527813a1f2bSShri Abhyankar         if (*pc != 0.0) {
528b65878eeSJunchao Zhang           pv         = ba + bdiag[row];
529813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
530813a1f2bSShri Abhyankar           *pc        = multiplier;
5312205254eSKarl Rupp 
53262fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533b65878eeSJunchao Zhang           pv = ba + bdiag[row + 1] + 1;
53462fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
5352205254eSKarl Rupp 
536813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5379566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
538813a1f2bSShri Abhyankar         }
539f268cba8SShri Abhyankar         row = *bjtmp++;
540813a1f2bSShri Abhyankar       }
541813a1f2bSShri Abhyankar 
542813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
543813a1f2bSShri Abhyankar       rs = 0.0;
544813a1f2bSShri Abhyankar       /* L part */
545b65878eeSJunchao Zhang       pv = ba + bi[i];
546813a1f2bSShri Abhyankar       pj = b->j + bi[i];
547813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
548813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5499371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5509371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
551813a1f2bSShri Abhyankar       }
552813a1f2bSShri Abhyankar 
553813a1f2bSShri Abhyankar       /* U part */
554b65878eeSJunchao Zhang       pv = ba + bdiag[i + 1] + 1;
55562fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
55662fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
557813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
5589371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
5599371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
560813a1f2bSShri Abhyankar       }
561813a1f2bSShri Abhyankar 
562813a1f2bSShri Abhyankar       sctx.rs = rs;
563813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
5649566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
56507b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
56607b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
567813a1f2bSShri Abhyankar 
568a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
569b65878eeSJunchao Zhang       pv  = ba + bdiag[i];
570813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
571813a1f2bSShri Abhyankar 
572813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
573813a1f2bSShri Abhyankar 
5748ff23777SHong Zhang     /* MatPivotRefine() */
57507b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576813a1f2bSShri Abhyankar       /*
577813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
578813a1f2bSShri Abhyankar        * then try lower shift
579813a1f2bSShri Abhyankar        */
580813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
581813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
58307b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
584813a1f2bSShri Abhyankar       sctx.nshift++;
585813a1f2bSShri Abhyankar     }
58607b50cabSHong Zhang   } while (sctx.newshift);
587813a1f2bSShri Abhyankar 
588b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
589b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
590b65878eeSJunchao Zhang 
5919566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5929566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5939566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
594d3ac4fa3SBarry Smith 
5959566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5969566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
5974d12350bSJunchao Zhang   if (b->inode.size_csr) {
598abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
599abb87a52SBarry Smith   } else if (row_identity && col_identity) {
60035233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
601813a1f2bSShri Abhyankar   } else {
60235233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
603813a1f2bSShri Abhyankar   }
60435233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
60535233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
60635233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
60735233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
608a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
609813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
610813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
6112205254eSKarl Rupp 
6129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
613813a1f2bSShri Abhyankar 
61414d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
615813a1f2bSShri Abhyankar   if (sctx.nshift) {
616f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
6179566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
618f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
6199566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
620f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
6219566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
622813a1f2bSShri Abhyankar     }
623813a1f2bSShri Abhyankar   }
6243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
625813a1f2bSShri Abhyankar }
626813a1f2bSShri Abhyankar 
627ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
628ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
629ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
630ff6a9541SJacob Faibussowitsch 
631d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
632d71ae5a4SJacob Faibussowitsch {
633719d5645SBarry Smith   Mat              C = B;
634416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
63582bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
6365d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
637d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
638d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
639d278edc7SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
640d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
641b65878eeSJunchao Zhang   const MatScalar *v, *aa;
642b65878eeSJunchao Zhang   MatScalar       *ba;
64333f9893dSHong Zhang   PetscReal        rs = 0.0;
644d90ac83dSHong Zhang   FactorShiftCtx   sctx;
6454f81c4b7SBarry Smith   const PetscInt  *ddiag;
646ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
647289bc588SBarry Smith 
6483a40ed3dSBarry Smith   PetscFunctionBegin;
649b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
650b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
651504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
6529566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
653ada7143aSSatish Balay 
654f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
65542898933SHong Zhang     ddiag          = a->diag;
6569f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
6576cc28720Svictorle     for (i = 0; i < n; i++) {
6589f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
6599f7d0b68SHong Zhang       d  = (aa)[ddiag[i]];
6601a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
6619f7d0b68SHong Zhang       v  = aa + ai[i];
6629f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
6632205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
6641a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
6656cc28720Svictorle     }
666118f5789SBarry Smith     sctx.shift_top *= 1.1;
667d2276718SHong Zhang     sctx.nshift_max = 5;
668d2276718SHong Zhang     sctx.shift_lo   = 0.;
669d2276718SHong Zhang     sctx.shift_hi   = 1.;
670a0a356efSHong Zhang   }
671a0a356efSHong Zhang 
6729566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
6739566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
675504a3fadSHong Zhang   ics = ic;
676504a3fadSHong Zhang 
677085256b3SBarry Smith   do {
67807b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
679289bc588SBarry Smith     for (i = 0; i < n; i++) {
680b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
681b3bf805bSHong Zhang       bjtmp = bj + bi[i];
6829f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
683289bc588SBarry Smith 
684289bc588SBarry Smith       /* load in initial (unfactored row) */
6859f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
6869f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
6879f7d0b68SHong Zhang       v     = aa + ai[r[i]];
688ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
689d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690289bc588SBarry Smith 
691b3bf805bSHong Zhang       row = *bjtmp++;
692f2582111SSatish Balay       while (row < i) {
6938c37ef55SBarry Smith         pc = rtmp + row;
6948c37ef55SBarry Smith         if (*pc != 0.0) {
695b65878eeSJunchao Zhang           pv         = ba + diag_offset[row];
696010a20caSHong Zhang           pj         = b->j + diag_offset[row] + 1;
69735aab85fSBarry Smith           multiplier = *pc / *pv++;
6988c37ef55SBarry Smith           *pc        = multiplier;
699b3bf805bSHong Zhang           nz         = bi[row + 1] - diag_offset[row] - 1;
7009f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
7019566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
702289bc588SBarry Smith         }
703b3bf805bSHong Zhang         row = *bjtmp++;
704289bc588SBarry Smith       }
705416022c9SBarry Smith       /* finished row so stick it into b->a */
706b65878eeSJunchao Zhang       pv   = ba + bi[i];
707b3bf805bSHong Zhang       pj   = b->j + bi[i];
708b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
709b3bf805bSHong Zhang       diag = diag_offset[i] - bi[i];
710e57ff13aSBarry Smith       rs   = 0.0;
711d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
7129f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
7139f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
714d3d32019SBarry Smith       }
715e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
716d2276718SHong Zhang 
717d2276718SHong Zhang       sctx.rs = rs;
718d2276718SHong Zhang       sctx.pv = pv[diag];
7199566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
72007b50cabSHong Zhang       if (sctx.newshift) break;
721504a3fadSHong Zhang       pv[diag] = sctx.pv;
72235aab85fSBarry Smith     }
723d2276718SHong Zhang 
72407b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
7256cc28720Svictorle       /*
7266c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
7276cc28720Svictorle        * then try lower shift
7286cc28720Svictorle        */
7290481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
7300481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
7310481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
73207b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
733d2276718SHong Zhang       sctx.nshift++;
7346cc28720Svictorle     }
73507b50cabSHong Zhang   } while (sctx.newshift);
7360f11f9aeSSatish Balay 
737a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
738b65878eeSJunchao Zhang   for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]];
7399566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7409566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7419566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
742b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
743b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
744d3ac4fa3SBarry Smith 
7459566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
7469566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
7478d8c7c43SBarry Smith   if (row_identity && col_identity) {
748ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
7498d8c7c43SBarry Smith   } else {
750ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
751db4efbfdSBarry Smith   }
752ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
753ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
754ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
756a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = NULL;
7572205254eSKarl Rupp 
758c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
7595c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
7602205254eSKarl Rupp 
7619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
762d2276718SHong Zhang   if (sctx.nshift) {
763f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7649566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
765f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7669566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
7676cc28720Svictorle     }
7680697b6caSHong Zhang   }
76957508eceSPierre Jolivet   C->ops->solve          = MatSolve_SeqAIJ_inplace;
77057508eceSPierre Jolivet   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
7712205254eSKarl Rupp 
7729566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
7733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
774289bc588SBarry Smith }
7750889b5dcSvictorle 
776ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
777ff6a9541SJacob Faibussowitsch 
778137fb511SHong Zhang /*
779137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
780137fb511SHong Zhang    Input:
781137fb511SHong Zhang      A - original matrix
782137fb511SHong Zhang    Output;
783137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
784137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
785137fb511SHong Zhang          a->a reordered accordingly with a->j
786137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
787137fb511SHong Zhang */
788d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
789d71ae5a4SJacob Faibussowitsch {
790b051339dSHong Zhang   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
791b051339dSHong Zhang   IS              isrow = a->row, isicol = a->icol;
7925d0c19d7SBarry Smith   const PetscInt *r, *ic, *ics;
7935d0c19d7SBarry Smith   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
7945d0c19d7SBarry Smith   PetscInt       *ajtmp, nz, row;
795b051339dSHong Zhang   PetscInt       *diag = a->diag, nbdiag, *pj;
796a77337e4SBarry Smith   PetscScalar    *rtmp, *pc, multiplier, d;
797504a3fadSHong Zhang   MatScalar      *pv, *v;
798137fb511SHong Zhang   PetscReal       rs;
799d90ac83dSHong Zhang   FactorShiftCtx  sctx;
800b65878eeSJunchao Zhang   MatScalar      *aa, *vtmp;
801137fb511SHong Zhang 
802137fb511SHong Zhang   PetscFunctionBegin;
80308401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
804504a3fadSHong Zhang 
805b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArray(A, &aa));
806504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
8079566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
808504a3fadSHong Zhang 
809504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
810504a3fadSHong Zhang     const PetscInt *ddiag = a->diag;
811504a3fadSHong Zhang     sctx.shift_top        = info->zeropivot;
812504a3fadSHong Zhang     for (i = 0; i < n; i++) {
813504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814504a3fadSHong Zhang       d    = (aa)[ddiag[i]];
815504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
816504a3fadSHong Zhang       vtmp = aa + ai[i];
817504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
8182205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
819504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
820504a3fadSHong Zhang     }
821504a3fadSHong Zhang     sctx.shift_top *= 1.1;
822504a3fadSHong Zhang     sctx.nshift_max = 5;
823504a3fadSHong Zhang     sctx.shift_lo   = 0.;
824504a3fadSHong Zhang     sctx.shift_hi   = 1.;
825504a3fadSHong Zhang   }
826504a3fadSHong Zhang 
8279566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
8289566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
8299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
8309566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
831b051339dSHong Zhang   ics = ic;
832137fb511SHong Zhang 
833504a3fadSHong Zhang #if defined(MV)
83475567043SBarry Smith   sctx.shift_top      = 0.;
835e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
83675567043SBarry Smith   sctx.shift_lo       = 0.;
83775567043SBarry Smith   sctx.shift_hi       = 0.;
83875567043SBarry Smith   sctx.shift_fraction = 0.;
839137fb511SHong Zhang 
840f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
84175567043SBarry Smith     sctx.shift_top = 0.;
842137fb511SHong Zhang     for (i = 0; i < n; i++) {
843137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
844b65878eeSJunchao Zhang       d  = aa[diag[i]];
845137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
846b65878eeSJunchao Zhang       v  = aa + ai[i];
847b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
8482205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
849137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
850137fb511SHong Zhang     }
851137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
852137fb511SHong Zhang     sctx.shift_top *= 1.1;
853137fb511SHong Zhang     sctx.nshift_max = 5;
854137fb511SHong Zhang     sctx.shift_lo   = 0.;
855137fb511SHong Zhang     sctx.shift_hi   = 1.;
856137fb511SHong Zhang   }
857137fb511SHong Zhang 
85875567043SBarry Smith   sctx.shift_amount = 0.;
859137fb511SHong Zhang   sctx.nshift       = 0;
860504a3fadSHong Zhang #endif
861504a3fadSHong Zhang 
862137fb511SHong Zhang   do {
86307b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
864137fb511SHong Zhang     for (i = 0; i < n; i++) {
865b051339dSHong Zhang       /* load in initial unfactored row */
866b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
867b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
868b65878eeSJunchao Zhang       v     = aa + ai[r[i]];
869137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
870b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
8719566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
872137fb511SHong Zhang 
873b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
874137fb511SHong Zhang       for (j = 0; j < nz; j++) {
875137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
876b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
877137fb511SHong Zhang       }
878137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
879137fb511SHong Zhang 
880b051339dSHong Zhang       row = *ajtmp++;
881137fb511SHong Zhang       while (row < i) {
882137fb511SHong Zhang         pc = rtmp + row;
883137fb511SHong Zhang         if (*pc != 0.0) {
884b65878eeSJunchao Zhang           pv = aa + diag[r[row]];
885b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
886137fb511SHong Zhang 
887137fb511SHong Zhang           multiplier = *pc / *pv++;
888137fb511SHong Zhang           *pc        = multiplier;
889b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
890b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
8919566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
892137fb511SHong Zhang         }
893b051339dSHong Zhang         row = *ajtmp++;
894137fb511SHong Zhang       }
895b65878eeSJunchao Zhang       /* finished row so overwrite it onto aa */
896b65878eeSJunchao Zhang       pv     = aa + ai[r[i]];
897b051339dSHong Zhang       pj     = aj + ai[r[i]];
898b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
899b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
900137fb511SHong Zhang 
901137fb511SHong Zhang       rs = 0.0;
902137fb511SHong Zhang       for (j = 0; j < nz; j++) {
903b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
904b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
905137fb511SHong Zhang       }
906137fb511SHong Zhang 
907137fb511SHong Zhang       sctx.rs = rs;
908b051339dSHong Zhang       sctx.pv = pv[nbdiag];
9099566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
91007b50cabSHong Zhang       if (sctx.newshift) break;
911504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
912137fb511SHong Zhang     }
913137fb511SHong Zhang 
91407b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
915137fb511SHong Zhang       /*
916137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
917137fb511SHong Zhang        * then try lower shift
918137fb511SHong Zhang        */
9190481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
9200481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
9210481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
92207b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
923137fb511SHong Zhang       sctx.nshift++;
924137fb511SHong Zhang     }
92507b50cabSHong Zhang   } while (sctx.newshift);
926137fb511SHong Zhang 
927a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
928b65878eeSJunchao Zhang   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
929137fb511SHong Zhang 
930b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArray(A, &aa));
9319566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
9329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
9339566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
9342205254eSKarl Rupp 
935b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
9392205254eSKarl Rupp 
940b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
9415c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
9422205254eSKarl Rupp 
9439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
944137fb511SHong Zhang   if (sctx.nshift) {
945f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
9469566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
947f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
9489566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
949137fb511SHong Zhang     }
950137fb511SHong Zhang   }
9513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
952137fb511SHong Zhang }
953137fb511SHong Zhang 
954d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
955d71ae5a4SJacob Faibussowitsch {
956416022c9SBarry Smith   Mat C;
957416022c9SBarry Smith 
9583a40ed3dSBarry Smith   PetscFunctionBegin;
9599566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
9609566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
9619566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
9622205254eSKarl Rupp 
963db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
964db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
9652205254eSKarl Rupp 
9669566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
9673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
968da3a660dSBarry Smith }
9691d098868SHong Zhang 
970d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
971d71ae5a4SJacob Faibussowitsch {
972416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
973416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
9745d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
9755d0c19d7SBarry Smith   PetscInt           nz;
9765d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
977dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
978d9fead3dSBarry Smith   const PetscScalar *b;
979b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
9808c37ef55SBarry Smith 
9813a40ed3dSBarry Smith   PetscFunctionBegin;
9823ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
98391bf9adeSBarry Smith 
984b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
9859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
9869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
987416022c9SBarry Smith   tmp = a->solve_work;
9888c37ef55SBarry Smith 
9899371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
9909371c9d4SSatish Balay   r = rout;
9919371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
9929371c9d4SSatish Balay   c = cout + (n - 1);
9938c37ef55SBarry Smith 
9948c37ef55SBarry Smith   /* forward solve the lower triangular */
9958c37ef55SBarry Smith   tmp[0] = b[*r++];
996010a20caSHong Zhang   tmps   = tmp;
9978c37ef55SBarry Smith   for (i = 1; i < n; i++) {
998010a20caSHong Zhang     v   = aa + ai[i];
999010a20caSHong Zhang     vi  = aj + ai[i];
100053e7425aSBarry Smith     nz  = a->diag[i] - ai[i];
100153e7425aSBarry Smith     sum = b[*r++];
1002003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10038c37ef55SBarry Smith     tmp[i] = sum;
10048c37ef55SBarry Smith   }
10058c37ef55SBarry Smith 
10068c37ef55SBarry Smith   /* backward solve the upper triangular */
10078c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
1008010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1009010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1010416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
10118c37ef55SBarry Smith     sum = tmp[i];
1012003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1013010a20caSHong Zhang     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
10148c37ef55SBarry Smith   }
1015b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
10169566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10179566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
10199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
10209566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
10213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10228c37ef55SBarry Smith }
1023026e39d0SSatish Balay 
1024ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1025d71ae5a4SJacob Faibussowitsch {
10262bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10272bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
10285d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1029910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10305d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
1031910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1032b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
1033910cf402Sprj-   PetscBool          isdense;
10342bebee5dSHong Zhang 
10352bebee5dSHong Zhang   PetscFunctionBegin;
10363ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
10379566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
103828b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1039f9fb9879SHong Zhang   if (X != B) {
10409566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
104128b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1042f9fb9879SHong Zhang   }
1043b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
10449566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
10459566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
10469566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
10479566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
10489371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10499371c9d4SSatish Balay   r = rout;
10509371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10519371c9d4SSatish Balay   c = cout;
1052a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
10532bebee5dSHong Zhang     /* forward solve the lower triangular */
10542bebee5dSHong Zhang     tmp[0] = b[r[0]];
10552bebee5dSHong Zhang     tmps   = tmp;
10562bebee5dSHong Zhang     for (i = 1; i < n; i++) {
10572bebee5dSHong Zhang       v   = aa + ai[i];
10582bebee5dSHong Zhang       vi  = aj + ai[i];
10592bebee5dSHong Zhang       nz  = a->diag[i] - ai[i];
10602bebee5dSHong Zhang       sum = b[r[i]];
1061003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10622bebee5dSHong Zhang       tmp[i] = sum;
10632bebee5dSHong Zhang     }
10642bebee5dSHong Zhang     /* backward solve the upper triangular */
10652bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
10662bebee5dSHong Zhang       v   = aa + a->diag[i] + 1;
10672bebee5dSHong Zhang       vi  = aj + a->diag[i] + 1;
10682bebee5dSHong Zhang       nz  = ai[i + 1] - a->diag[i] - 1;
10692bebee5dSHong Zhang       sum = tmp[i];
1070003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
10712bebee5dSHong Zhang       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
10722bebee5dSHong Zhang     }
1073910cf402Sprj-     b += ldb;
1074910cf402Sprj-     x += ldx;
10752bebee5dSHong Zhang   }
1076b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
10779566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10789566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10799566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
10809566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
10819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
10823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10832bebee5dSHong Zhang }
10842bebee5dSHong Zhang 
1085d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1086d71ae5a4SJacob Faibussowitsch {
10879bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
10889bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
10899bd0847aSShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1090910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
10919bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
1092910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
1093b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
1094910cf402Sprj-   PetscBool          isdense;
10959bd0847aSShri Abhyankar 
10969bd0847aSShri Abhyankar   PetscFunctionBegin;
10973ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
10989566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
109928b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1100f9fb9879SHong Zhang   if (X != B) {
11019566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
110228b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1103f9fb9879SHong Zhang   }
1104b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11059566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
11069566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
11079566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
11089566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
11099371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11109371c9d4SSatish Balay   r = rout;
11119371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11129371c9d4SSatish Balay   c = cout;
11139bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
11149bd0847aSShri Abhyankar     /* forward solve the lower triangular */
11159bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
11169bd0847aSShri Abhyankar     v      = aa;
11179bd0847aSShri Abhyankar     vi     = aj;
11189bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
11199bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
11209bd0847aSShri Abhyankar       sum = b[r[i]];
11219bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
11229bd0847aSShri Abhyankar       tmp[i] = sum;
11239371c9d4SSatish Balay       v += nz;
11249371c9d4SSatish Balay       vi += nz;
11259bd0847aSShri Abhyankar     }
11269bd0847aSShri Abhyankar     /* backward solve the upper triangular */
11279bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
11289bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
11299bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
11309bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
11319bd0847aSShri Abhyankar       sum = tmp[i];
11329bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
11339bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
11349bd0847aSShri Abhyankar     }
1135910cf402Sprj-     b += ldb;
1136910cf402Sprj-     x += ldx;
11379bd0847aSShri Abhyankar   }
1138b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
11399566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11409566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11419566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
11429566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
11439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
11443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11459bd0847aSShri Abhyankar }
11469bd0847aSShri Abhyankar 
1147a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1148a3d9026eSPierre Jolivet {
1149a3d9026eSPierre Jolivet   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1150a3d9026eSPierre Jolivet   IS                 iscol = a->col, isrow = a->row;
1151a3d9026eSPierre Jolivet   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1152a3d9026eSPierre Jolivet   PetscInt           nz, neq, ldb, ldx;
1153a3d9026eSPierre Jolivet   const PetscInt    *rout, *cout, *r, *c;
1154a3d9026eSPierre Jolivet   PetscScalar       *x, *tmp = a->solve_work, s1;
1155b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
1156a3d9026eSPierre Jolivet   PetscBool          isdense;
1157a3d9026eSPierre Jolivet 
1158a3d9026eSPierre Jolivet   PetscFunctionBegin;
1159a3d9026eSPierre Jolivet   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1160a3d9026eSPierre Jolivet   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1161a3d9026eSPierre Jolivet   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1162a3d9026eSPierre Jolivet   if (X != B) {
1163a3d9026eSPierre Jolivet     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1164a3d9026eSPierre Jolivet     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1165a3d9026eSPierre Jolivet   }
1166b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1167a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArrayRead(B, &b));
1168a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(B, &ldb));
1169a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArray(X, &x));
1170a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(X, &ldx));
1171a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(isrow, &rout));
1172a3d9026eSPierre Jolivet   r = rout;
1173a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(iscol, &cout));
1174a3d9026eSPierre Jolivet   c = cout;
1175a3d9026eSPierre Jolivet   for (neq = 0; neq < B->cmap->n; neq++) {
1176a3d9026eSPierre Jolivet     /* copy the b into temp work space according to permutation */
1177a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1178a3d9026eSPierre Jolivet 
1179a3d9026eSPierre Jolivet     /* forward solve the U^T */
1180a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) {
1181a3d9026eSPierre Jolivet       v  = aa + adiag[i + 1] + 1;
1182a3d9026eSPierre Jolivet       vi = aj + adiag[i + 1] + 1;
1183a3d9026eSPierre Jolivet       nz = adiag[i] - adiag[i + 1] - 1;
1184a3d9026eSPierre Jolivet       s1 = tmp[i];
1185a3d9026eSPierre Jolivet       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1186a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187a3d9026eSPierre Jolivet       tmp[i] = s1;
1188a3d9026eSPierre Jolivet     }
1189a3d9026eSPierre Jolivet 
1190a3d9026eSPierre Jolivet     /* backward solve the L^T */
1191a3d9026eSPierre Jolivet     for (i = n - 1; i >= 0; i--) {
1192a3d9026eSPierre Jolivet       v  = aa + ai[i];
1193a3d9026eSPierre Jolivet       vi = aj + ai[i];
1194a3d9026eSPierre Jolivet       nz = ai[i + 1] - ai[i];
1195a3d9026eSPierre Jolivet       s1 = tmp[i];
1196a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1197a3d9026eSPierre Jolivet     }
1198a3d9026eSPierre Jolivet 
1199a3d9026eSPierre Jolivet     /* copy tmp into x according to permutation */
1200a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1201a3d9026eSPierre Jolivet     b += ldb;
1202a3d9026eSPierre Jolivet     x += ldx;
1203a3d9026eSPierre Jolivet   }
1204b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1205a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(isrow, &rout));
1206a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(iscol, &cout));
1207a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArrayRead(B, &b));
1208a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArray(X, &x));
1209a3d9026eSPierre Jolivet   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1210a3d9026eSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
1211a3d9026eSPierre Jolivet }
1212a3d9026eSPierre Jolivet 
1213ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1214d71ae5a4SJacob Faibussowitsch {
1215137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1216137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
12175d0c19d7SBarry Smith   const PetscInt    *r, *c, *rout, *cout;
12185d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
12195d0c19d7SBarry Smith   PetscInt           nz, row;
1220fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1221fdc842d1SBarry Smith   const PetscScalar *b;
1222b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1223137fb511SHong Zhang 
1224137fb511SHong Zhang   PetscFunctionBegin;
12253ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1226137fb511SHong Zhang 
1227b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12299566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1230137fb511SHong Zhang   tmp = a->solve_work;
1231137fb511SHong Zhang 
12329371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12339371c9d4SSatish Balay   r = rout;
12349371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12359371c9d4SSatish Balay   c = cout + (n - 1);
1236137fb511SHong Zhang 
1237137fb511SHong Zhang   /* forward solve the lower triangular */
1238137fb511SHong Zhang   tmp[0] = b[*r++];
1239137fb511SHong Zhang   tmps   = tmp;
1240137fb511SHong Zhang   for (row = 1; row < n; row++) {
1241137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1242137fb511SHong Zhang     v   = aa + ai[i];
1243137fb511SHong Zhang     vi  = aj + ai[i];
1244137fb511SHong Zhang     nz  = a->diag[i] - ai[i];
1245137fb511SHong Zhang     sum = b[*r++];
1246003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1247137fb511SHong Zhang     tmp[row] = sum;
1248137fb511SHong Zhang   }
1249137fb511SHong Zhang 
1250137fb511SHong Zhang   /* backward solve the upper triangular */
1251137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1252137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1253137fb511SHong Zhang     v   = aa + a->diag[i] + 1;
1254137fb511SHong Zhang     vi  = aj + a->diag[i] + 1;
1255137fb511SHong Zhang     nz  = ai[i + 1] - a->diag[i] - 1;
1256137fb511SHong Zhang     sum = tmp[row];
1257003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1258137fb511SHong Zhang     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1259137fb511SHong Zhang   }
1260b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
12619566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12629566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
12659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1267137fb511SHong Zhang }
1268137fb511SHong Zhang 
1269c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1270ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1271d71ae5a4SJacob Faibussowitsch {
1272930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1273d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1274b377110cSBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1275356650c2SBarry Smith   PetscScalar       *x;
1276356650c2SBarry Smith   const PetscScalar *b;
1277b65878eeSJunchao Zhang   const MatScalar   *aa;
1278aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1279356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1280b377110cSBarry Smith   const PetscInt  *vi;
1281dd6ea824SBarry Smith   const MatScalar *v;
1282dd6ea824SBarry Smith   PetscScalar      sum;
1283d85afc42SSatish Balay #endif
1284930ae53cSBarry Smith 
12853a40ed3dSBarry Smith   PetscFunctionBegin;
12863ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1287930ae53cSBarry Smith 
1288b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12899566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1291930ae53cSBarry Smith 
1292aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
12931c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
12941c4feecaSSatish Balay #else
1295930ae53cSBarry Smith   /* forward solve the lower triangular */
1296930ae53cSBarry Smith   x[0] = b[0];
1297930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1298930ae53cSBarry Smith     ai_i = ai[i];
1299930ae53cSBarry Smith     v    = aa + ai_i;
1300930ae53cSBarry Smith     vi   = aj + ai_i;
1301930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1302930ae53cSBarry Smith     sum  = b[i];
1303003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1304930ae53cSBarry Smith     x[i] = sum;
1305930ae53cSBarry Smith   }
1306930ae53cSBarry Smith 
1307930ae53cSBarry Smith   /* backward solve the upper triangular */
1308930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1309930ae53cSBarry Smith     adiag_i = adiag[i];
1310d708749eSBarry Smith     v       = aa + adiag_i + 1;
1311d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1312930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1313930ae53cSBarry Smith     sum     = x[i];
1314003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1315930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1316930ae53cSBarry Smith   }
13171c4feecaSSatish Balay #endif
13189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1319b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
13209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
13223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1323930ae53cSBarry Smith }
1324930ae53cSBarry Smith 
1325ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1326d71ae5a4SJacob Faibussowitsch {
1327416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1328416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
132904fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
13305d0c19d7SBarry Smith   PetscInt           nz;
133104fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
133204fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
133304fbf559SBarry Smith   const PetscScalar *b;
1334b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1335da3a660dSBarry Smith 
13363a40ed3dSBarry Smith   PetscFunctionBegin;
13379566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1338da3a660dSBarry Smith 
1339b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
13409566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13419566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1342416022c9SBarry Smith   tmp = a->solve_work;
1343da3a660dSBarry Smith 
13449371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13459371c9d4SSatish Balay   r = rout;
13469371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13479371c9d4SSatish Balay   c = cout + (n - 1);
1348da3a660dSBarry Smith 
1349da3a660dSBarry Smith   /* forward solve the lower triangular */
1350da3a660dSBarry Smith   tmp[0] = b[*r++];
1351da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1352010a20caSHong Zhang     v   = aa + ai[i];
1353010a20caSHong Zhang     vi  = aj + ai[i];
1354416022c9SBarry Smith     nz  = a->diag[i] - ai[i];
1355da3a660dSBarry Smith     sum = b[*r++];
135604fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1357da3a660dSBarry Smith     tmp[i] = sum;
1358da3a660dSBarry Smith   }
1359da3a660dSBarry Smith 
1360da3a660dSBarry Smith   /* backward solve the upper triangular */
1361da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1362010a20caSHong Zhang     v   = aa + a->diag[i] + 1;
1363010a20caSHong Zhang     vi  = aj + a->diag[i] + 1;
1364416022c9SBarry Smith     nz  = ai[i + 1] - a->diag[i] - 1;
1365da3a660dSBarry Smith     sum = tmp[i];
136604fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1367010a20caSHong Zhang     tmp[i] = sum * aa[a->diag[i]];
1368da3a660dSBarry Smith     x[*c--] += tmp[i];
1369da3a660dSBarry Smith   }
1370da3a660dSBarry Smith 
1371b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
13729566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13739566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
13749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13769566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
13773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1378da3a660dSBarry Smith }
137904fbf559SBarry Smith 
1380d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1381d71ae5a4SJacob Faibussowitsch {
13823c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13833c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
13843c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
13853c0119dfSShri Abhyankar   PetscInt           nz;
13863c0119dfSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
13873c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
13883c0119dfSShri Abhyankar   const PetscScalar *b;
1389b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
13903c0119dfSShri Abhyankar 
13913c0119dfSShri Abhyankar   PetscFunctionBegin;
13929566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
13933c0119dfSShri Abhyankar 
1394b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
13959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
13973c0119dfSShri Abhyankar   tmp = a->solve_work;
13983c0119dfSShri Abhyankar 
13999371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14009371c9d4SSatish Balay   r = rout;
14019371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14029371c9d4SSatish Balay   c = cout;
14033c0119dfSShri Abhyankar 
14043c0119dfSShri Abhyankar   /* forward solve the lower triangular */
14053c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
14063c0119dfSShri Abhyankar   v      = aa;
14073c0119dfSShri Abhyankar   vi     = aj;
14083c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
14093c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
14103c0119dfSShri Abhyankar     sum = b[r[i]];
14113c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
14123c0119dfSShri Abhyankar     tmp[i] = sum;
14132205254eSKarl Rupp     v += nz;
14142205254eSKarl Rupp     vi += nz;
14153c0119dfSShri Abhyankar   }
14163c0119dfSShri Abhyankar 
14173c0119dfSShri Abhyankar   /* backward solve the upper triangular */
14183c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
14193c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
14203c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
14213c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
14223c0119dfSShri Abhyankar     sum = tmp[i];
14233c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
14243c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
14253c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
14269371c9d4SSatish Balay     v += nz + 1;
14279371c9d4SSatish Balay     vi += nz + 1;
14283c0119dfSShri Abhyankar   }
14293c0119dfSShri Abhyankar 
1430b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
14319566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14329566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
14339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
14359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
14363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14373c0119dfSShri Abhyankar }
14383c0119dfSShri Abhyankar 
1439d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1440d71ae5a4SJacob Faibussowitsch {
1441416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
14422235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
144304fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
144404fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
144504fbf559SBarry Smith   PetscInt           nz;
144604fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1447b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
144804fbf559SBarry Smith   const PetscScalar *b;
1449da3a660dSBarry Smith 
14503a40ed3dSBarry Smith   PetscFunctionBegin;
1451b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
14529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1454416022c9SBarry Smith   tmp = a->solve_work;
1455da3a660dSBarry Smith 
14569371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14579371c9d4SSatish Balay   r = rout;
14589371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14599371c9d4SSatish Balay   c = cout;
1460da3a660dSBarry Smith 
1461da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
14622235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1463da3a660dSBarry Smith 
1464da3a660dSBarry Smith   /* forward solve the U^T */
1465da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1466010a20caSHong Zhang     v  = aa + diag[i];
1467010a20caSHong Zhang     vi = aj + diag[i] + 1;
1468f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1469c38d4ed2SBarry Smith     s1 = tmp[i];
1470ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
147104fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1472c38d4ed2SBarry Smith     tmp[i] = s1;
1473da3a660dSBarry Smith   }
1474da3a660dSBarry Smith 
1475da3a660dSBarry Smith   /* backward solve the L^T */
1476da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1477010a20caSHong Zhang     v  = aa + diag[i] - 1;
1478010a20caSHong Zhang     vi = aj + diag[i] - 1;
1479f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1480f1af5d2fSBarry Smith     s1 = tmp[i];
148104fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1482da3a660dSBarry Smith   }
1483da3a660dSBarry Smith 
1484da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1485591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1486da3a660dSBarry Smith 
14879566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14889566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1489b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
14909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1492da3a660dSBarry Smith 
14939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1495da3a660dSBarry Smith }
1496da3a660dSBarry Smith 
1497d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1498d71ae5a4SJacob Faibussowitsch {
1499d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1500d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1501d1fa9404SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1502d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1503d1fa9404SShri Abhyankar   PetscInt           nz;
1504d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1505b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1506d1fa9404SShri Abhyankar   const PetscScalar *b;
1507d1fa9404SShri Abhyankar 
1508d1fa9404SShri Abhyankar   PetscFunctionBegin;
1509b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
15109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
15119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1512d1fa9404SShri Abhyankar   tmp = a->solve_work;
1513d1fa9404SShri Abhyankar 
15149371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
15159371c9d4SSatish Balay   r = rout;
15169371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
15179371c9d4SSatish Balay   c = cout;
1518d1fa9404SShri Abhyankar 
1519d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1520d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1521d1fa9404SShri Abhyankar 
1522d1fa9404SShri Abhyankar   /* forward solve the U^T */
1523d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1524d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1525d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1526d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1527d1fa9404SShri Abhyankar     s1 = tmp[i];
1528d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1529d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1530d1fa9404SShri Abhyankar     tmp[i] = s1;
1531d1fa9404SShri Abhyankar   }
1532d1fa9404SShri Abhyankar 
1533d1fa9404SShri Abhyankar   /* backward solve the L^T */
1534d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1535d1fa9404SShri Abhyankar     v  = aa + ai[i];
1536d1fa9404SShri Abhyankar     vi = aj + ai[i];
1537d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1538d1fa9404SShri Abhyankar     s1 = tmp[i];
1539d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1540d1fa9404SShri Abhyankar   }
1541d1fa9404SShri Abhyankar 
1542d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1543d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1544d1fa9404SShri Abhyankar 
15459566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
15469566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1547b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
15489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
15499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1550d1fa9404SShri Abhyankar 
15519566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
15523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1553d1fa9404SShri Abhyankar }
1554d1fa9404SShri Abhyankar 
1555d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1556d71ae5a4SJacob Faibussowitsch {
1557416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
15582235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
155904fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
156004fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
156104fbf559SBarry Smith   PetscInt           nz;
156204fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1563b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
156404fbf559SBarry Smith   const PetscScalar *b;
15656abc6512SBarry Smith 
15663a40ed3dSBarry Smith   PetscFunctionBegin;
15679566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1568b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
15699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
15709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1571416022c9SBarry Smith   tmp = a->solve_work;
15726abc6512SBarry Smith 
15739371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
15749371c9d4SSatish Balay   r = rout;
15759371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
15769371c9d4SSatish Balay   c = cout;
15776abc6512SBarry Smith 
15786abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
15792235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
15806abc6512SBarry Smith 
15816abc6512SBarry Smith   /* forward solve the U^T */
15826abc6512SBarry Smith   for (i = 0; i < n; i++) {
1583010a20caSHong Zhang     v  = aa + diag[i];
1584010a20caSHong Zhang     vi = aj + diag[i] + 1;
1585f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
158604fbf559SBarry Smith     s1 = tmp[i];
158704fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
158804fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
158904fbf559SBarry Smith     tmp[i] = s1;
15906abc6512SBarry Smith   }
15916abc6512SBarry Smith 
15926abc6512SBarry Smith   /* backward solve the L^T */
15936abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1594010a20caSHong Zhang     v  = aa + diag[i] - 1;
1595010a20caSHong Zhang     vi = aj + diag[i] - 1;
1596f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
159704fbf559SBarry Smith     s1 = tmp[i];
159804fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
15996abc6512SBarry Smith   }
16006abc6512SBarry Smith 
16016abc6512SBarry Smith   /* copy tmp into x according to permutation */
16026abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
16036abc6512SBarry Smith 
16049566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
16059566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1606b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
16079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
16089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
16096abc6512SBarry Smith 
16109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
16113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1612da3a660dSBarry Smith }
161304fbf559SBarry Smith 
1614d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1615d71ae5a4SJacob Faibussowitsch {
1616533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1617533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1618533e6011SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1619533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1620533e6011SShri Abhyankar   PetscInt           nz;
1621533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1622b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1623533e6011SShri Abhyankar   const PetscScalar *b;
1624533e6011SShri Abhyankar 
1625533e6011SShri Abhyankar   PetscFunctionBegin;
16269566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1627b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
16289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
16299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1630533e6011SShri Abhyankar   tmp = a->solve_work;
1631533e6011SShri Abhyankar 
16329371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
16339371c9d4SSatish Balay   r = rout;
16349371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
16359371c9d4SSatish Balay   c = cout;
1636533e6011SShri Abhyankar 
1637533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1638533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1639533e6011SShri Abhyankar 
1640533e6011SShri Abhyankar   /* forward solve the U^T */
1641533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1642533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1643533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1644533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1645533e6011SShri Abhyankar     s1 = tmp[i];
1646533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1647533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1648533e6011SShri Abhyankar     tmp[i] = s1;
1649533e6011SShri Abhyankar   }
1650533e6011SShri Abhyankar 
1651533e6011SShri Abhyankar   /* backward solve the L^T */
1652533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1653533e6011SShri Abhyankar     v  = aa + ai[i];
1654533e6011SShri Abhyankar     vi = aj + ai[i];
1655533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1656533e6011SShri Abhyankar     s1 = tmp[i];
1657c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1658533e6011SShri Abhyankar   }
1659533e6011SShri Abhyankar 
1660533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1661533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1662533e6011SShri Abhyankar 
16639566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
16649566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1665b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
16669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
16679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1668533e6011SShri Abhyankar 
16699566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
16703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1671533e6011SShri Abhyankar }
1672533e6011SShri Abhyankar 
16738fc3a347SHong Zhang /*
16748a76ed4fSHong Zhang    ilu() under revised new data structure.
1675813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1676813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1677813a1f2bSShri Abhyankar 
1678813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1679baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1680baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1681baabb860SHong Zhang 
1682813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1683baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1684a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1685813a1f2bSShri Abhyankar 
1686813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1687813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1688813a1f2bSShri Abhyankar */
1689d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1690d71ae5a4SJacob Faibussowitsch {
1691813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1692bbd65245SShri Abhyankar   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1693bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
16941df811f5SHong Zhang   IS             isicol;
1695813a1f2bSShri Abhyankar 
1696813a1f2bSShri Abhyankar   PetscFunctionBegin;
16979566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
16989566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
169957508eceSPierre Jolivet   b = (Mat_SeqAIJ *)fact->data;
1700813a1f2bSShri Abhyankar 
1701813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
1702*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1703*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
17049f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
17059f0612e4SBarry Smith   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
17069f0612e4SBarry Smith   b->free_a  = PETSC_TRUE;
17079f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
17082205254eSKarl Rupp 
1709aa624791SPierre Jolivet   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1710813a1f2bSShri Abhyankar   bdiag = b->diag;
1711813a1f2bSShri Abhyankar 
1712813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1713813a1f2bSShri Abhyankar   bi = b->i;
1714813a1f2bSShri Abhyankar   bj = b->j;
1715813a1f2bSShri Abhyankar 
1716813a1f2bSShri Abhyankar   /* L part */
1717e60cf9a0SBarry Smith   bi[0] = 0;
1718813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1719813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1720813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1721813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
1722ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1723813a1f2bSShri Abhyankar   }
1724813a1f2bSShri Abhyankar 
1725813a1f2bSShri Abhyankar   /* U part */
172662fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1727813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1728813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1729813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
1730ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1731813a1f2bSShri Abhyankar     /* diag[i] */
1732bbd65245SShri Abhyankar     bj[k++]  = i;
1733a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1734813a1f2bSShri Abhyankar   }
17351df811f5SHong Zhang 
1736d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
17371df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
17381df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
17391df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
174035233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
17419566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
17421df811f5SHong Zhang 
174357508eceSPierre Jolivet   b       = (Mat_SeqAIJ *)fact->data;
17441df811f5SHong Zhang   b->row  = isrow;
17451df811f5SHong Zhang   b->col  = iscol;
17461df811f5SHong Zhang   b->icol = isicol;
1747*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
17489566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
17499566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
17503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1751813a1f2bSShri Abhyankar }
1752813a1f2bSShri Abhyankar 
1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1754d71ae5a4SJacob Faibussowitsch {
1755813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1756813a1f2bSShri Abhyankar   IS                 isicol;
1757813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
17581df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1759813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1760813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1761813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1762035f4963SHong Zhang   PetscBool          col_identity, row_identity, missing;
1763813a1f2bSShri Abhyankar   PetscReal          f;
17640298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1765813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1766813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
17670298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
17680298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1769813a1f2bSShri Abhyankar 
1770813a1f2bSShri Abhyankar   PetscFunctionBegin;
177108401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
17729566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
177328b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1774ad04f41aSHong Zhang 
1775813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
17769566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
17779566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1778813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1779813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
17809566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
17814d12350bSJunchao Zhang     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
17823ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1783813a1f2bSShri Abhyankar   }
1784813a1f2bSShri Abhyankar 
17859566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
17869566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
17879566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1788813a1f2bSShri Abhyankar 
17891bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
17909f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
17919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1792e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
17939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1794813a1f2bSShri Abhyankar 
1795813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1796813a1f2bSShri Abhyankar   nlnk = n + 1;
17979566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1798813a1f2bSShri Abhyankar 
1799813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
18001df811f5SHong Zhang   f             = info->fill;
18011df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
18029566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1803813a1f2bSShri Abhyankar   current_space = free_space;
18049566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1805813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1806813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1807813a1f2bSShri Abhyankar     nzi = 0;
1808813a1f2bSShri Abhyankar     /* copy current row into linked list */
1809813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
181028b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1811813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1812813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
18139566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1814813a1f2bSShri Abhyankar     nzi += nlnk;
1815813a1f2bSShri Abhyankar 
1816813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1817813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1818813a1f2bSShri Abhyankar       fm = n;
1819813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1820813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1821813a1f2bSShri Abhyankar       lnk[fm]    = i;
1822813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
18239371c9d4SSatish Balay       nzi++;
18249371c9d4SSatish Balay       dcount++;
1825813a1f2bSShri Abhyankar     }
1826813a1f2bSShri Abhyankar 
1827813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1828813a1f2bSShri Abhyankar     nzbd = 0;
1829813a1f2bSShri Abhyankar     prow = lnk[n];
1830813a1f2bSShri Abhyankar     while (prow < i) {
1831813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1832813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1833813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1834813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
18359566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1836813a1f2bSShri Abhyankar       nzi += nlnk;
1837813a1f2bSShri Abhyankar       prow = lnk[prow];
1838813a1f2bSShri Abhyankar       nzbd++;
1839813a1f2bSShri Abhyankar     }
1840813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1841813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1842813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1843813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1844f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
18459566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
18469566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1847813a1f2bSShri Abhyankar       reallocs++;
1848813a1f2bSShri Abhyankar     }
1849813a1f2bSShri Abhyankar 
1850813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
18519566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1852813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1853813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1854813a1f2bSShri Abhyankar 
1855813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
185600045ab3SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1857813a1f2bSShri Abhyankar 
1858813a1f2bSShri Abhyankar     current_space->array += nzi;
1859813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1860813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1861813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1862813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1863813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1864813a1f2bSShri Abhyankar   }
1865813a1f2bSShri Abhyankar 
18669566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
18679566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1868813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1869*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
18709566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1871813a1f2bSShri Abhyankar 
18729566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
18739566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
18749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1875813a1f2bSShri Abhyankar 
1876813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1877813a1f2bSShri Abhyankar   {
1878aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
18799566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
18809566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
18819566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
18829566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
188348a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1884813a1f2bSShri Abhyankar   }
1885813a1f2bSShri Abhyankar #endif
1886813a1f2bSShri Abhyankar   /* put together the new matrix */
18879566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
188857508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)fact->data;
1889813a1f2bSShri Abhyankar   b->free_ij = PETSC_TRUE;
18909f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
18919f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
1892813a1f2bSShri Abhyankar   b->j      = bj;
1893813a1f2bSShri Abhyankar   b->i      = bi;
1894813a1f2bSShri Abhyankar   b->diag   = bdiag;
1895f4259b30SLisandro Dalcin   b->ilen   = NULL;
1896f4259b30SLisandro Dalcin   b->imax   = NULL;
1897813a1f2bSShri Abhyankar   b->row    = isrow;
1898813a1f2bSShri Abhyankar   b->col    = iscol;
18999566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
19009566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1901813a1f2bSShri Abhyankar   b->icol = isicol;
19022205254eSKarl Rupp 
1903*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
1904813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1905813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
1906f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
19072205254eSKarl Rupp 
190857508eceSPierre Jolivet   fact->info.factor_mallocs    = reallocs;
190957508eceSPierre Jolivet   fact->info.fill_ratio_given  = f;
191057508eceSPierre Jolivet   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
191157508eceSPierre Jolivet   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
19124d12350bSJunchao Zhang   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
19139566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
19143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1915813a1f2bSShri Abhyankar }
1916813a1f2bSShri Abhyankar 
1917ff6a9541SJacob Faibussowitsch #if 0
1918ff6a9541SJacob Faibussowitsch // unused
1919ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920d71ae5a4SJacob Faibussowitsch {
19216516239fSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
19226516239fSHong Zhang   IS                 isicol;
19236516239fSHong Zhang   const PetscInt    *r, *ic;
1924ece542f9SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
19256516239fSHong Zhang   PetscInt          *bi, *cols, nnz, *cols_lvl;
19266516239fSHong Zhang   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
19276516239fSHong Zhang   PetscInt           i, levels, diagonal_fill;
1928ace3abfcSBarry Smith   PetscBool          col_identity, row_identity;
19296516239fSHong Zhang   PetscReal          f;
19300298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
19316516239fSHong Zhang   PetscBT            lnkbt;
19326516239fSHong Zhang   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
19330298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
19340298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1935ace3abfcSBarry Smith   PetscBool          missing;
19366516239fSHong Zhang 
19376516239fSHong Zhang   PetscFunctionBegin;
193808401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
19399566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
194028b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1941ece542f9SHong Zhang 
19426516239fSHong Zhang   f             = info->fill;
19436516239fSHong Zhang   levels        = (PetscInt)info->levels;
19446516239fSHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
19452205254eSKarl Rupp 
19469566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
19476516239fSHong Zhang 
19489566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
19499566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
19506516239fSHong Zhang   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
19519566063dSJacob Faibussowitsch     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
19522205254eSKarl Rupp 
1953ad04f41aSHong Zhang     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
19544d12350bSJunchao Zhang     if (a->inode.size_csr) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1955d5f3da31SBarry Smith     fact->factortype               = MAT_FACTOR_ILU;
1956719d5645SBarry Smith     (fact)->info.factor_mallocs    = 0;
1957719d5645SBarry Smith     (fact)->info.fill_ratio_given  = info->fill;
1958719d5645SBarry Smith     (fact)->info.fill_ratio_needed = 1.0;
19592205254eSKarl Rupp 
196057508eceSPierre Jolivet     b       = (Mat_SeqAIJ *)fact->data;
196108480c60SBarry Smith     b->row  = isrow;
196208480c60SBarry Smith     b->col  = iscol;
196382bf6240SBarry Smith     b->icol = isicol;
19649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
19659566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)isrow));
19669566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)iscol));
19673ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
196808480c60SBarry Smith   }
196908480c60SBarry Smith 
19709566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
19719566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
19729e25ed09SBarry Smith 
19731bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
19749f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
19759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1976a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
19776cf997b0SBarry Smith 
19789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
19795a8e39fbSHong Zhang 
19805a8e39fbSHong Zhang   /* create a linked list for storing column indices of the active row */
19815a8e39fbSHong Zhang   nlnk = n + 1;
19829566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
19835a8e39fbSHong Zhang 
19845a8e39fbSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
19859566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
19865a8e39fbSHong Zhang   current_space = free_space;
19879566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
19885a8e39fbSHong Zhang   current_space_lvl = free_space_lvl;
19895a8e39fbSHong Zhang 
19905a8e39fbSHong Zhang   for (i = 0; i < n; i++) {
19915a8e39fbSHong Zhang     nzi = 0;
19926cf997b0SBarry Smith     /* copy current row into linked list */
19935a8e39fbSHong Zhang     nnz = ai[r[i] + 1] - ai[r[i]];
199428b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
19955a8e39fbSHong Zhang     cols   = aj + ai[r[i]];
19965a8e39fbSHong Zhang     lnk[i] = -1; /* marker to indicate if diagonal exists */
19979566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
19985a8e39fbSHong Zhang     nzi += nlnk;
19996cf997b0SBarry Smith 
20006cf997b0SBarry Smith     /* make sure diagonal entry is included */
20015a8e39fbSHong Zhang     if (diagonal_fill && lnk[i] == -1) {
20026cf997b0SBarry Smith       fm = n;
20035a8e39fbSHong Zhang       while (lnk[fm] < i) fm = lnk[fm];
20045a8e39fbSHong Zhang       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
20055a8e39fbSHong Zhang       lnk[fm]    = i;
20065a8e39fbSHong Zhang       lnk_lvl[i] = 0;
20079371c9d4SSatish Balay       nzi++;
20089371c9d4SSatish Balay       dcount++;
20096cf997b0SBarry Smith     }
20106cf997b0SBarry Smith 
20115a8e39fbSHong Zhang     /* add pivot rows into the active row */
20125a8e39fbSHong Zhang     nzbd = 0;
20135a8e39fbSHong Zhang     prow = lnk[n];
20145a8e39fbSHong Zhang     while (prow < i) {
20155a8e39fbSHong Zhang       nnz      = bdiag[prow];
20165a8e39fbSHong Zhang       cols     = bj_ptr[prow] + nnz + 1;
20175a8e39fbSHong Zhang       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
20185a8e39fbSHong Zhang       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
20199566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
20205a8e39fbSHong Zhang       nzi += nlnk;
20215a8e39fbSHong Zhang       prow = lnk[prow];
20225a8e39fbSHong Zhang       nzbd++;
202356beaf04SBarry Smith     }
20245a8e39fbSHong Zhang     bdiag[i]  = nzbd;
20255a8e39fbSHong Zhang     bi[i + 1] = bi[i] + nzi;
2026ecf371e4SBarry Smith 
20275a8e39fbSHong Zhang     /* if free space is not available, make more free space */
20285a8e39fbSHong Zhang     if (current_space->local_remaining < nzi) {
2029f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
20309566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
20319566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
20325a8e39fbSHong Zhang       reallocs++;
20335a8e39fbSHong Zhang     }
2034ecf371e4SBarry Smith 
20355a8e39fbSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
20369566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
20375a8e39fbSHong Zhang     bj_ptr[i]    = current_space->array;
20385a8e39fbSHong Zhang     bjlvl_ptr[i] = current_space_lvl->array;
20395a8e39fbSHong Zhang 
20405a8e39fbSHong Zhang     /* make sure the active row i has diagonal entry */
204100045ab3SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
20425a8e39fbSHong Zhang 
20435a8e39fbSHong Zhang     current_space->array += nzi;
20445a8e39fbSHong Zhang     current_space->local_used += nzi;
20455a8e39fbSHong Zhang     current_space->local_remaining -= nzi;
20465a8e39fbSHong Zhang     current_space_lvl->array += nzi;
20475a8e39fbSHong Zhang     current_space_lvl->local_used += nzi;
20485a8e39fbSHong Zhang     current_space_lvl->local_remaining -= nzi;
20499e25ed09SBarry Smith   }
20505a8e39fbSHong Zhang 
20519566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
20529566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
20535a8e39fbSHong Zhang 
20545a8e39fbSHong Zhang   /* destroy list of free space and other temporary arrays */
20559f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
20569566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
20579566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
20589566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
20599566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
20609e25ed09SBarry Smith 
20616cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
2062f64065bbSBarry Smith   {
20635a8e39fbSHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
20649566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
20659566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
20669566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
20679566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
206848a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2069f64065bbSBarry Smith   }
207063ba0a88SBarry Smith   #endif
2071bbb6d6a8SBarry Smith 
20729e25ed09SBarry Smith   /* put together the new matrix */
20739566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
207457508eceSPierre Jolivet   b = (Mat_SeqAIJ *)fact->data;
2075e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
20769f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
20779f0612e4SBarry Smith   b->free_a       = PETSC_TRUE;
20785a8e39fbSHong Zhang   b->j = bj;
20795a8e39fbSHong Zhang   b->i = bi;
20805a8e39fbSHong Zhang   for (i = 0; i < n; i++) bdiag[i] += bi[i];
20815a8e39fbSHong Zhang   b->diag = bdiag;
2082f4259b30SLisandro Dalcin   b->ilen = NULL;
2083f4259b30SLisandro Dalcin   b->imax = NULL;
2084416022c9SBarry Smith   b->row  = isrow;
2085416022c9SBarry Smith   b->col  = iscol;
20869566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
20879566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
208882bf6240SBarry Smith   b->icol = isicol;
20899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2090416022c9SBarry Smith   /* In b structure:  Free imax, ilen, old a, old j.
20915a8e39fbSHong Zhang      Allocate bdiag, solve_work, new a, new j */
20925a8e39fbSHong Zhang   b->maxnz = b->nz = bi[n];
20932205254eSKarl Rupp 
209457508eceSPierre Jolivet   fact->info.factor_mallocs    = reallocs;
209557508eceSPierre Jolivet   fact->info.fill_ratio_given  = f;
209657508eceSPierre Jolivet   fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
209757508eceSPierre Jolivet   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
20984d12350bSJunchao Zhang   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
20993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21009e25ed09SBarry Smith }
2101ff6a9541SJacob Faibussowitsch #endif
2102d5d45c9bSBarry Smith 
2103d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2104d71ae5a4SJacob Faibussowitsch {
21055f44c854SHong Zhang   Mat              C  = B;
21065f44c854SHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
21075f44c854SHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
21085f44c854SHong Zhang   IS               ip = b->row, iip = b->icol;
21095f44c854SHong Zhang   const PetscInt  *rip, *riip;
21105f44c854SHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
21115f44c854SHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
21125f44c854SHong Zhang   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2113b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2114ace3abfcSBarry Smith   PetscBool        perm_identity;
2115d90ac83dSHong Zhang   FactorShiftCtx   sctx;
211698a93161SHong Zhang   PetscReal        rs;
2117b65878eeSJunchao Zhang   const MatScalar *aa, *v;
2118b65878eeSJunchao Zhang   MatScalar        d;
211998a93161SHong Zhang 
21205f44c854SHong Zhang   PetscFunctionBegin;
2121b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
212298a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
21239566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
212498a93161SHong Zhang 
2125f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
212698a93161SHong Zhang     sctx.shift_top = info->zeropivot;
212798a93161SHong Zhang     for (i = 0; i < mbs; i++) {
212898a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
212998a93161SHong Zhang       d  = (aa)[a->diag[i]];
213098a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
213198a93161SHong Zhang       v  = aa + ai[i];
213298a93161SHong Zhang       nz = ai[i + 1] - ai[i];
21332205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
213498a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
213598a93161SHong Zhang     }
213698a93161SHong Zhang     sctx.shift_top *= 1.1;
213798a93161SHong Zhang     sctx.nshift_max = 5;
213898a93161SHong Zhang     sctx.shift_lo   = 0.;
213998a93161SHong Zhang     sctx.shift_hi   = 1.;
214098a93161SHong Zhang   }
21415f44c854SHong Zhang 
21429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
21439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
21445f44c854SHong Zhang 
21455f44c854SHong Zhang   /* allocate working arrays
21465f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
21475f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
21485f44c854SHong Zhang   */
21499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
21505f44c854SHong Zhang 
215198a93161SHong Zhang   do {
215207b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
215398a93161SHong Zhang 
2154c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
21552e987568SSatish Balay     if (mbs) il[0] = 0;
21565f44c854SHong Zhang 
21575f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
21585f44c854SHong Zhang       /* zero rtmp */
21595f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
21605f44c854SHong Zhang       bjtmp = bj + bi[k];
21615f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
21625f44c854SHong Zhang 
21635f44c854SHong Zhang       /* load in initial unfactored row */
21645f44c854SHong Zhang       bval = ba + bi[k];
21659371c9d4SSatish Balay       jmin = ai[rip[k]];
21669371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
21675f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
21685f44c854SHong Zhang         col = riip[aj[j]];
21695f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
21705f44c854SHong Zhang           rtmp[col] = aa[j];
21715f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
21725f44c854SHong Zhang         }
21735f44c854SHong Zhang       }
217498a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
217598a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
21765f44c854SHong Zhang 
21775f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
21785f44c854SHong Zhang       dk = rtmp[k];
21795f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
21805f44c854SHong Zhang 
21815f44c854SHong Zhang       while (i < k) {
21825f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
21835f44c854SHong Zhang 
21845f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
21855f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
21865f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
21875f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
21885f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
21895f44c854SHong Zhang 
21905f44c854SHong Zhang         /* add multiple of row i to k-th row */
21919371c9d4SSatish Balay         jmin = ili + 1;
21929371c9d4SSatish Balay         jmax = bi[i + 1];
21935f44c854SHong Zhang         if (jmin < jmax) {
21945f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
21955f44c854SHong Zhang           /* update il and c2r for row i */
21965f44c854SHong Zhang           il[i]  = jmin;
21979371c9d4SSatish Balay           j      = bj[jmin];
21989371c9d4SSatish Balay           c2r[i] = c2r[j];
21999371c9d4SSatish Balay           c2r[j] = i;
22005f44c854SHong Zhang         }
22015f44c854SHong Zhang         i = nexti;
22025f44c854SHong Zhang       }
22035f44c854SHong Zhang 
22045f44c854SHong Zhang       /* copy data into U(k,:) */
220598a93161SHong Zhang       rs   = 0.0;
22069371c9d4SSatish Balay       jmin = bi[k];
22079371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
22085f44c854SHong Zhang       if (jmin < jmax) {
22095f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
22109371c9d4SSatish Balay           col   = bj[j];
22119371c9d4SSatish Balay           ba[j] = rtmp[col];
22129371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
22135f44c854SHong Zhang         }
22145f44c854SHong Zhang         /* add the k-th row into il and c2r */
22155f44c854SHong Zhang         il[k]  = jmin;
22169371c9d4SSatish Balay         i      = bj[jmin];
22179371c9d4SSatish Balay         c2r[k] = c2r[i];
22189371c9d4SSatish Balay         c2r[i] = k;
22195f44c854SHong Zhang       }
222098a93161SHong Zhang 
222198a93161SHong Zhang       /* MatPivotCheck() */
222298a93161SHong Zhang       sctx.rs = rs;
222398a93161SHong Zhang       sctx.pv = dk;
22249566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
222507b50cabSHong Zhang       if (sctx.newshift) break;
222698a93161SHong Zhang       dk = sctx.pv;
222798a93161SHong Zhang 
222898a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
222998a93161SHong Zhang     }
223007b50cabSHong Zhang   } while (sctx.newshift);
22315f44c854SHong Zhang 
2232b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
22339566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
22349566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
22359566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
22365f44c854SHong Zhang 
22379566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
22385f44c854SHong Zhang   if (perm_identity) {
223935233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
224035233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
22412169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
22422169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
22435f44c854SHong Zhang   } else {
224435233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
224535233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
224635233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
224735233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
22485f44c854SHong Zhang   }
22495f44c854SHong Zhang 
22505f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
22515f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
22522205254eSKarl Rupp 
22539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
225498a93161SHong Zhang 
225598a93161SHong Zhang   /* MatPivotView() */
225698a93161SHong Zhang   if (sctx.nshift) {
2257f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
22589566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2259f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
22609566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2261f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
22629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
226398a93161SHong Zhang     }
226498a93161SHong Zhang   }
22653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22665f44c854SHong Zhang }
22675f44c854SHong Zhang 
2268d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2269d71ae5a4SJacob Faibussowitsch {
2270719d5645SBarry Smith   Mat              C  = B;
22710968510aSHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
22722ed133dbSHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
22739bfd6278SHong Zhang   IS               ip = b->row, iip = b->icol;
22745d0c19d7SBarry Smith   const PetscInt  *rip, *riip;
2275c5546cabSHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
22762ed133dbSHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
22772ed133dbSHong Zhang   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2278b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2279b65878eeSJunchao Zhang   const MatScalar *aa, *v;
2280ace3abfcSBarry Smith   PetscBool        perm_identity;
22810e95ead3SHong Zhang   FactorShiftCtx   sctx;
22820e95ead3SHong Zhang   PetscReal        rs;
2283b65878eeSJunchao Zhang   MatScalar        d;
2284d5d45c9bSBarry Smith 
2285a6175056SHong Zhang   PetscFunctionBegin;
2286b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
22870e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
22889566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
22890e95ead3SHong Zhang 
22900e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
22910e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
22920e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
22930e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
22940e95ead3SHong Zhang       d  = (aa)[a->diag[i]];
22950e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
22960e95ead3SHong Zhang       v  = aa + ai[i];
22970e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
22982205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
22990e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
23000e95ead3SHong Zhang     }
23010e95ead3SHong Zhang     sctx.shift_top *= 1.1;
23020e95ead3SHong Zhang     sctx.nshift_max = 5;
23030e95ead3SHong Zhang     sctx.shift_lo   = 0.;
23040e95ead3SHong Zhang     sctx.shift_hi   = 1.;
23050e95ead3SHong Zhang   }
2306ee45ca4aSHong Zhang 
23079566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
23089566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
23092ed133dbSHong Zhang 
23102ed133dbSHong Zhang   /* initialization */
23119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
23120e95ead3SHong Zhang 
23132ed133dbSHong Zhang   do {
231407b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
23150e95ead3SHong Zhang 
2316c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
2317c5546cabSHong Zhang     il[0] = 0;
23182ed133dbSHong Zhang 
23192ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
2320c5546cabSHong Zhang       /* zero rtmp */
2321c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
2322c5546cabSHong Zhang       bjtmp = bj + bi[k];
2323c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2324c5546cabSHong Zhang 
2325c05c3958SHong Zhang       bval = ba + bi[k];
23262ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
23279371c9d4SSatish Balay       jmin = ai[rip[k]];
23289371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
23292ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
23309bfd6278SHong Zhang         col = riip[aj[j]];
23312ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
23322ed133dbSHong Zhang           rtmp[col] = aa[j];
2333c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
23342ed133dbSHong Zhang         }
23352ed133dbSHong Zhang       }
233639e3d630SHong Zhang       /* shift the diagonal of the matrix */
2337540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
23382ed133dbSHong Zhang 
23392ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
23402ed133dbSHong Zhang       dk = rtmp[k];
23412ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
23422ed133dbSHong Zhang 
23432ed133dbSHong Zhang       while (i < k) {
23442ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
23452ed133dbSHong Zhang 
23462ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
23472ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
23482ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
23492ed133dbSHong Zhang         dk += uikdi * ba[ili];
23502ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
23512ed133dbSHong Zhang 
23522ed133dbSHong Zhang         /* add multiple of row i to k-th row */
23539371c9d4SSatish Balay         jmin = ili + 1;
23549371c9d4SSatish Balay         jmax = bi[i + 1];
23552ed133dbSHong Zhang         if (jmin < jmax) {
23562ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
23572ed133dbSHong Zhang           /* update il and jl for row i */
23582ed133dbSHong Zhang           il[i] = jmin;
23599371c9d4SSatish Balay           j     = bj[jmin];
23609371c9d4SSatish Balay           jl[i] = jl[j];
23619371c9d4SSatish Balay           jl[j] = i;
23622ed133dbSHong Zhang         }
23632ed133dbSHong Zhang         i = nexti;
23642ed133dbSHong Zhang       }
23652ed133dbSHong Zhang 
23660697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
23670697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
23680697b6caSHong Zhang       rs   = 0.0;
23693c9e8598SHong Zhang       jmin = bi[k] + 1;
23703c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
23713c9e8598SHong Zhang       bcol = bj + jmin;
2372ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2373540703acSHong Zhang 
2374540703acSHong Zhang       sctx.rs = rs;
2375540703acSHong Zhang       sctx.pv = dk;
23769566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
237707b50cabSHong Zhang       if (sctx.newshift) break;
23780e95ead3SHong Zhang       dk = sctx.pv;
23793c9e8598SHong Zhang 
23803c9e8598SHong Zhang       /* copy data into U(k,:) */
238139e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
23829371c9d4SSatish Balay       jmin      = bi[k] + 1;
23839371c9d4SSatish Balay       jmax      = bi[k + 1];
238439e3d630SHong Zhang       if (jmin < jmax) {
238539e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
23869371c9d4SSatish Balay           col   = bj[j];
23879371c9d4SSatish Balay           ba[j] = rtmp[col];
23883c9e8598SHong Zhang         }
238939e3d630SHong Zhang         /* add the k-th row into il and jl */
23903c9e8598SHong Zhang         il[k] = jmin;
23919371c9d4SSatish Balay         i     = bj[jmin];
23929371c9d4SSatish Balay         jl[k] = jl[i];
23939371c9d4SSatish Balay         jl[i] = k;
23943c9e8598SHong Zhang       }
23953c9e8598SHong Zhang     }
239607b50cabSHong Zhang   } while (sctx.newshift);
23970e95ead3SHong Zhang 
23989566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
23999566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
24009566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2401b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2402db4efbfdSBarry Smith 
24039566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2404db4efbfdSBarry Smith   if (perm_identity) {
24050a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
24060a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
24070a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
24080a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2409db4efbfdSBarry Smith   } else {
24100a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
24110a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
24120a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
24130a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2414db4efbfdSBarry Smith   }
2415db4efbfdSBarry Smith 
24163c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
24173c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
24182205254eSKarl Rupp 
24199566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2420540703acSHong Zhang   if (sctx.nshift) {
2421f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
24229566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2423f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
24249566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
24250697b6caSHong Zhang     }
24263c9e8598SHong Zhang   }
24273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24283c9e8598SHong Zhang }
2429a6175056SHong Zhang 
24308a76ed4fSHong Zhang /*
24318a76ed4fSHong Zhang    icc() under revised new data structure.
24328a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
24338a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
24348a76ed4fSHong Zhang 
24358a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
24368a76ed4fSHong Zhang    ui+
24378a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
24388a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
24398a76ed4fSHong Zhang 
24408a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
24418a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
24428a76ed4fSHong Zhang 
24438a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
24448a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
24458a76ed4fSHong Zhang */
24468a76ed4fSHong Zhang 
2447d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2448d71ae5a4SJacob Faibussowitsch {
24490968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2450ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2451ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
24525f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
24535d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
2454ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
24550298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
24565a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2457ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
24580298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
24590298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
24607a48dd6fSHong Zhang   PetscBT            lnkbt;
2461b635d36bSHong Zhang   IS                 iperm;
2462a6175056SHong Zhang 
2463a6175056SHong Zhang   PetscFunctionBegin;
246408401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
24659566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
246628b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
24679566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
24689566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
24697a48dd6fSHong Zhang 
24709f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
24719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
247239e3d630SHong Zhang   ui[0] = 0;
247339e3d630SHong Zhang 
24748a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2475622e2028SHong Zhang   if (!levels && perm_identity) {
24765f44c854SHong Zhang     for (i = 0; i < am; i++) {
2477c5546cabSHong Zhang       ncols     = ai[i + 1] - a->diag[i];
2478c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2479c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2480dc1e2a3fSHong Zhang     }
24819566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2482dc1e2a3fSHong Zhang     cols = uj;
2483dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
24845f44c854SHong Zhang       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2485dc1e2a3fSHong Zhang       ncols = ai[i + 1] - a->diag[i] - 1;
24865f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2487910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
24885f44c854SHong Zhang     }
24895f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
24909566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
24919566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
24925f44c854SHong Zhang 
24935f44c854SHong Zhang     /* initialization */
24949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
24955f44c854SHong Zhang 
24965f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
24975f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
24989566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
24995f44c854SHong Zhang     for (i = 0; i < am; i++) {
25009371c9d4SSatish Balay       jl[i] = am;
25019371c9d4SSatish Balay       il[i] = 0;
25025f44c854SHong Zhang     }
25035f44c854SHong Zhang 
25045f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
25055f44c854SHong Zhang     nlnk = am + 1;
25069566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
25075f44c854SHong Zhang 
250895b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
25099566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
25105f44c854SHong Zhang     current_space = free_space;
25119566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
25125f44c854SHong Zhang     current_space_lvl = free_space_lvl;
25135f44c854SHong Zhang 
25145f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
25155f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
25165f44c854SHong Zhang       nzk   = 0;
25175f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
251828b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
25195f44c854SHong Zhang       ncols_upper = 0;
25205f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
25215f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
25225f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
25235f44c854SHong Zhang           ajtmp[ncols_upper] = i;
25245f44c854SHong Zhang           ncols_upper++;
25255f44c854SHong Zhang         }
25265f44c854SHong Zhang       }
25279566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
25285f44c854SHong Zhang       nzk += nlnk;
25295f44c854SHong Zhang 
25305f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
25315f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
25325f44c854SHong Zhang 
25335f44c854SHong Zhang       while (prow < k) {
25345f44c854SHong Zhang         nextprow = jl[prow];
25355f44c854SHong Zhang 
25365f44c854SHong Zhang         /* merge prow into k-th row */
25375f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
25385f44c854SHong Zhang         jmax  = ui[prow + 1];
25395f44c854SHong Zhang         ncols = jmax - jmin;
25405f44c854SHong Zhang         i     = jmin - ui[prow];
25415f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
25425f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
25435f44c854SHong Zhang         j     = *(uj - 1);
25449566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
25455f44c854SHong Zhang         nzk += nlnk;
25465f44c854SHong Zhang 
25475f44c854SHong Zhang         /* update il and jl for prow */
25485f44c854SHong Zhang         if (jmin < jmax) {
25495f44c854SHong Zhang           il[prow] = jmin;
25509371c9d4SSatish Balay           j        = *cols;
25519371c9d4SSatish Balay           jl[prow] = jl[j];
25529371c9d4SSatish Balay           jl[j]    = prow;
25535f44c854SHong Zhang         }
25545f44c854SHong Zhang         prow = nextprow;
25555f44c854SHong Zhang       }
25565f44c854SHong Zhang 
25575f44c854SHong Zhang       /* if free space is not available, make more free space */
25585f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
25595f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2560f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
25619566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
25629566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
25635f44c854SHong Zhang         reallocs++;
25645f44c854SHong Zhang       }
25655f44c854SHong Zhang 
25665f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
256708401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
25689566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
25695f44c854SHong Zhang 
25705f44c854SHong Zhang       /* add the k-th row into il and jl */
25715f44c854SHong Zhang       if (nzk > 1) {
25725f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
25739371c9d4SSatish Balay         jl[k] = jl[i];
25749371c9d4SSatish Balay         jl[i] = k;
25755f44c854SHong Zhang         il[k] = ui[k] + 1;
25765f44c854SHong Zhang       }
25775f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
25785f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
25795f44c854SHong Zhang 
25805f44c854SHong Zhang       current_space->array += nzk;
25815f44c854SHong Zhang       current_space->local_used += nzk;
25825f44c854SHong Zhang       current_space->local_remaining -= nzk;
25835f44c854SHong Zhang 
25845f44c854SHong Zhang       current_space_lvl->array += nzk;
25855f44c854SHong Zhang       current_space_lvl->local_used += nzk;
25865f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
25875f44c854SHong Zhang 
25885f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
25895f44c854SHong Zhang     }
25905f44c854SHong Zhang 
25919566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
25929566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
25939566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
25949566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
25955f44c854SHong Zhang 
25969263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
25979f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
25989566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
25999566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
26009566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
26015f44c854SHong Zhang 
26025f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
26035f44c854SHong Zhang 
26045f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
260557508eceSPierre Jolivet   b          = (Mat_SeqSBAIJ *)fact->data;
26069f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
2607*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
26089f0612e4SBarry Smith   b->free_a    = PETSC_TRUE;
26095f44c854SHong Zhang   b->j         = uj;
26105f44c854SHong Zhang   b->i         = ui;
26115f44c854SHong Zhang   b->diag      = udiag;
26127f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2613f4259b30SLisandro Dalcin   b->ilen      = NULL;
2614f4259b30SLisandro Dalcin   b->imax      = NULL;
26155f44c854SHong Zhang   b->row       = perm;
26165f44c854SHong Zhang   b->col       = perm;
26179566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
26189566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
26195f44c854SHong Zhang   b->icol          = iperm;
26205f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
26212205254eSKarl Rupp 
2622*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
26232205254eSKarl Rupp 
26245f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
26255f44c854SHong Zhang 
2626f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2627f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
26285f44c854SHong Zhang   if (ai[am] != 0) {
26296a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
263095b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
26315f44c854SHong Zhang   } else {
2632f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
26335f44c854SHong Zhang   }
26349263d837SHong Zhang #if defined(PETSC_USE_INFO)
26359263d837SHong Zhang   if (ai[am] != 0) {
26369263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
26379566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
26389566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
26399566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
26409263d837SHong Zhang   } else {
26419566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
26429263d837SHong Zhang   }
26439263d837SHong Zhang #endif
264435233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
26453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26465f44c854SHong Zhang }
26475f44c854SHong Zhang 
2648ff6a9541SJacob Faibussowitsch #if 0
2649ff6a9541SJacob Faibussowitsch // unused
2650ff6a9541SJacob Faibussowitsch static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2651d71ae5a4SJacob Faibussowitsch {
26525f44c854SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
26535f44c854SHong Zhang   Mat_SeqSBAIJ      *b;
2654ace3abfcSBarry Smith   PetscBool          perm_identity, missing;
26555f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
26565f44c854SHong Zhang   const PetscInt    *rip, *riip;
26575f44c854SHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
26580298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
26595f44c854SHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
26605f44c854SHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
26610298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
26620298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
26635f44c854SHong Zhang   PetscBT            lnkbt;
26645f44c854SHong Zhang   IS                 iperm;
26655f44c854SHong Zhang 
26665f44c854SHong Zhang   PetscFunctionBegin;
266708401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
26689566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &d));
266928b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
26709566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
26719566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
26725f44c854SHong Zhang 
26739f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
26749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
26755f44c854SHong Zhang   ui[0] = 0;
26765f44c854SHong Zhang 
26775f44c854SHong Zhang   /* ICC(0) without matrix ordering: simply copies fill pattern */
26785f44c854SHong Zhang   if (!levels && perm_identity) {
26795f44c854SHong Zhang     for (i = 0; i < am; i++) {
26805f44c854SHong Zhang       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
26815f44c854SHong Zhang       udiag[i]  = ui[i];
2682ed59401aSHong Zhang     }
26839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
268439e3d630SHong Zhang     cols = uj;
2685ed59401aSHong Zhang     for (i = 0; i < am; i++) {
2686ed59401aSHong Zhang       aj    = a->j + a->diag[i];
268739e3d630SHong Zhang       ncols = ui[i + 1] - ui[i];
268839e3d630SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2689ed59401aSHong Zhang     }
269039e3d630SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
26919566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
26929566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
2693b635d36bSHong Zhang 
26947a48dd6fSHong Zhang     /* initialization */
26959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
26967a48dd6fSHong Zhang 
26977a48dd6fSHong Zhang     /* jl: linked list for storing indices of the pivot rows
26987a48dd6fSHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
26999566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
27007a48dd6fSHong Zhang     for (i = 0; i < am; i++) {
27019371c9d4SSatish Balay       jl[i] = am;
27029371c9d4SSatish Balay       il[i] = 0;
27037a48dd6fSHong Zhang     }
27047a48dd6fSHong Zhang 
27057a48dd6fSHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
27067a48dd6fSHong Zhang     nlnk = am + 1;
27079566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
27087a48dd6fSHong Zhang 
27097a48dd6fSHong Zhang     /* initial FreeSpace size is fill*(ai[am]+1) */
27109566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
27117a48dd6fSHong Zhang     current_space = free_space;
27129566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
27137a48dd6fSHong Zhang     current_space_lvl = free_space_lvl;
27147a48dd6fSHong Zhang 
27157a48dd6fSHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
27167a48dd6fSHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
27177a48dd6fSHong Zhang       nzk   = 0;
2718622e2028SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
271928b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2720622e2028SHong Zhang       ncols_upper = 0;
2721622e2028SHong Zhang       for (j = 0; j < ncols; j++) {
2722b635d36bSHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2723b635d36bSHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
27245a8e39fbSHong Zhang           ajtmp[ncols_upper] = i;
2725622e2028SHong Zhang           ncols_upper++;
2726622e2028SHong Zhang         }
2727622e2028SHong Zhang       }
27289566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
27297a48dd6fSHong Zhang       nzk += nlnk;
27307a48dd6fSHong Zhang 
27317a48dd6fSHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
27327a48dd6fSHong Zhang       prow = jl[k]; /* 1st pivot row */
27337a48dd6fSHong Zhang 
27347a48dd6fSHong Zhang       while (prow < k) {
27357a48dd6fSHong Zhang         nextprow = jl[prow];
27367a48dd6fSHong Zhang 
27377a48dd6fSHong Zhang         /* merge prow into k-th row */
27387a48dd6fSHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
27397a48dd6fSHong Zhang         jmax  = ui[prow + 1];
27407a48dd6fSHong Zhang         ncols = jmax - jmin;
2741ed59401aSHong Zhang         i     = jmin - ui[prow];
2742ed59401aSHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
27435a8e39fbSHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
27445a8e39fbSHong Zhang         j     = *(uj - 1);
27459566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
27467a48dd6fSHong Zhang         nzk += nlnk;
27477a48dd6fSHong Zhang 
27487a48dd6fSHong Zhang         /* update il and jl for prow */
27497a48dd6fSHong Zhang         if (jmin < jmax) {
27507a48dd6fSHong Zhang           il[prow] = jmin;
27519371c9d4SSatish Balay           j        = *cols;
27529371c9d4SSatish Balay           jl[prow] = jl[j];
27539371c9d4SSatish Balay           jl[j]    = prow;
27547a48dd6fSHong Zhang         }
27557a48dd6fSHong Zhang         prow = nextprow;
27567a48dd6fSHong Zhang       }
27577a48dd6fSHong Zhang 
27587a48dd6fSHong Zhang       /* if free space is not available, make more free space */
27597a48dd6fSHong Zhang       if (current_space->local_remaining < nzk) {
27607a48dd6fSHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2761f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
27629566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
27639566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
27647a48dd6fSHong Zhang         reallocs++;
27657a48dd6fSHong Zhang       }
27667a48dd6fSHong Zhang 
27677a48dd6fSHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
276828b400f6SJacob Faibussowitsch       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
27699566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
27707a48dd6fSHong Zhang 
27717a48dd6fSHong Zhang       /* add the k-th row into il and jl */
277239e3d630SHong Zhang       if (nzk > 1) {
27737a48dd6fSHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
27749371c9d4SSatish Balay         jl[k] = jl[i];
27759371c9d4SSatish Balay         jl[i] = k;
27767a48dd6fSHong Zhang         il[k] = ui[k] + 1;
27777a48dd6fSHong Zhang       }
27787a48dd6fSHong Zhang       uj_ptr[k]     = current_space->array;
27797a48dd6fSHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
27807a48dd6fSHong Zhang 
27817a48dd6fSHong Zhang       current_space->array += nzk;
27827a48dd6fSHong Zhang       current_space->local_used += nzk;
27837a48dd6fSHong Zhang       current_space->local_remaining -= nzk;
27847a48dd6fSHong Zhang 
27857a48dd6fSHong Zhang       current_space_lvl->array += nzk;
27867a48dd6fSHong Zhang       current_space_lvl->local_used += nzk;
27877a48dd6fSHong Zhang       current_space_lvl->local_remaining -= nzk;
27887a48dd6fSHong Zhang 
27897a48dd6fSHong Zhang       ui[k + 1] = ui[k] + nzk;
27907a48dd6fSHong Zhang     }
27917a48dd6fSHong Zhang 
27926cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
27937a48dd6fSHong Zhang     if (ai[am] != 0) {
279439e3d630SHong Zhang       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
27959566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
27969566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
27979566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
27987a48dd6fSHong Zhang     } else {
27999566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "Empty matrix\n"));
28007a48dd6fSHong Zhang     }
280163ba0a88SBarry Smith   #endif
28027a48dd6fSHong Zhang 
28039566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
28049566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
28059566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
28069566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
28077a48dd6fSHong Zhang 
28087a48dd6fSHong Zhang     /* destroy list of free space and other temporary array(s) */
28099f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
28109566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
28119566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
28129566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
28137a48dd6fSHong Zhang 
281439e3d630SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
281539e3d630SHong Zhang 
28167a48dd6fSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
28177a48dd6fSHong Zhang 
2818f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
28199f0612e4SBarry Smith   b->free_ij       = PETSC_TRUE;
28209f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
28219f0612e4SBarry Smith   b->free_a        = PETSC_TRUE;
28227a48dd6fSHong Zhang   b->j         = uj;
28237a48dd6fSHong Zhang   b->i         = ui;
28245f44c854SHong Zhang   b->diag      = udiag;
28257f53bb6cSHong Zhang   b->free_diag = PETSC_TRUE;
2826f4259b30SLisandro Dalcin   b->ilen      = NULL;
2827f4259b30SLisandro Dalcin   b->imax      = NULL;
28287a48dd6fSHong Zhang   b->row       = perm;
2829b635d36bSHong Zhang   b->col       = perm;
28302205254eSKarl Rupp 
28319566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28329566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
28332205254eSKarl Rupp 
2834b635d36bSHong Zhang   b->icol          = iperm;
28357a48dd6fSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
28369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
28377a48dd6fSHong Zhang   b->maxnz = b->nz = ui[am];
28387a48dd6fSHong Zhang 
2839f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2840f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
28417a48dd6fSHong Zhang   if (ai[am] != 0) {
2842f284f12aSHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
28437a48dd6fSHong Zhang   } else {
2844f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
28457a48dd6fSHong Zhang   }
28460a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
28473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2848a6175056SHong Zhang }
2849ff6a9541SJacob Faibussowitsch #endif
2850d5d45c9bSBarry Smith 
2851d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2852d71ae5a4SJacob Faibussowitsch {
28530be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
28540be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
28559186b105SHong Zhang   PetscBool          perm_identity, missing;
28560be760fbSHong Zhang   PetscReal          fill = info->fill;
28570be760fbSHong Zhang   const PetscInt    *rip, *riip;
28580be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
28590be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
28600be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
28610298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
28620be760fbSHong Zhang   PetscBT            lnkbt;
28630be760fbSHong Zhang   IS                 iperm;
28640be760fbSHong Zhang 
28650be760fbSHong Zhang   PetscFunctionBegin;
286608401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
28679566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
286828b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
28699186b105SHong Zhang 
28700be760fbSHong Zhang   /* check whether perm is the identity mapping */
28719566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
28729566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
28739566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
28749566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
28750be760fbSHong Zhang 
28760be760fbSHong Zhang   /* initialization */
28779f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
28789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
28790be760fbSHong Zhang   ui[0] = 0;
28800be760fbSHong Zhang 
28810be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
28820be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
28839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
28840be760fbSHong Zhang   for (i = 0; i < am; i++) {
28859371c9d4SSatish Balay     jl[i] = am;
28869371c9d4SSatish Balay     il[i] = 0;
28870be760fbSHong Zhang   }
28880be760fbSHong Zhang 
28890be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
28900be760fbSHong Zhang   nlnk = am + 1;
28919566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
28920be760fbSHong Zhang 
289395b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
28949566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
28950be760fbSHong Zhang   current_space = free_space;
28960be760fbSHong Zhang 
28970be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
28980be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
28990be760fbSHong Zhang     nzk   = 0;
29000be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
290128b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
29020be760fbSHong Zhang     ncols_upper = 0;
29030be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
29040be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
29050be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
29060be760fbSHong Zhang         cols[ncols_upper] = i;
29070be760fbSHong Zhang         ncols_upper++;
29080be760fbSHong Zhang       }
29090be760fbSHong Zhang     }
29109566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
29110be760fbSHong Zhang     nzk += nlnk;
29120be760fbSHong Zhang 
29130be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
29140be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
29150be760fbSHong Zhang 
29160be760fbSHong Zhang     while (prow < k) {
29170be760fbSHong Zhang       nextprow = jl[prow];
29180be760fbSHong Zhang       /* merge prow into k-th row */
29190be760fbSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
29200be760fbSHong Zhang       jmax   = ui[prow + 1];
29210be760fbSHong Zhang       ncols  = jmax - jmin;
29220be760fbSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
29239566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
29240be760fbSHong Zhang       nzk += nlnk;
29250be760fbSHong Zhang 
29260be760fbSHong Zhang       /* update il and jl for prow */
29270be760fbSHong Zhang       if (jmin < jmax) {
29280be760fbSHong Zhang         il[prow] = jmin;
29292205254eSKarl Rupp         j        = *uj_ptr;
29302205254eSKarl Rupp         jl[prow] = jl[j];
29312205254eSKarl Rupp         jl[j]    = prow;
29320be760fbSHong Zhang       }
29330be760fbSHong Zhang       prow = nextprow;
29340be760fbSHong Zhang     }
29350be760fbSHong Zhang 
29360be760fbSHong Zhang     /* if free space is not available, make more free space */
29370be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
29380be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2939f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
29409566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
29410be760fbSHong Zhang       reallocs++;
29420be760fbSHong Zhang     }
29430be760fbSHong Zhang 
29440be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
29459566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
29460be760fbSHong Zhang 
29470be760fbSHong Zhang     /* add the k-th row into il and jl */
29487b056e98SHong Zhang     if (nzk > 1) {
29490be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
29509371c9d4SSatish Balay       jl[k] = jl[i];
29519371c9d4SSatish Balay       jl[i] = k;
29520be760fbSHong Zhang       il[k] = ui[k] + 1;
29530be760fbSHong Zhang     }
29540be760fbSHong Zhang     ui_ptr[k] = current_space->array;
29552205254eSKarl Rupp 
29560be760fbSHong Zhang     current_space->array += nzk;
29570be760fbSHong Zhang     current_space->local_used += nzk;
29580be760fbSHong Zhang     current_space->local_remaining -= nzk;
29590be760fbSHong Zhang 
29600be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
29610be760fbSHong Zhang   }
29620be760fbSHong Zhang 
29639566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
29649566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
29659566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
29660be760fbSHong Zhang 
29679263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2968*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
29699566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
29709566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
29710be760fbSHong Zhang 
29720be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
2973f284f12aSHong Zhang   b          = (Mat_SeqSBAIJ *)fact->data;
29740be760fbSHong Zhang   b->free_ij = PETSC_TRUE;
2975*84648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
29769f0612e4SBarry Smith   b->free_a    = PETSC_TRUE;
29770be760fbSHong Zhang   b->j         = uj;
29780be760fbSHong Zhang   b->i         = ui;
29790be760fbSHong Zhang   b->diag      = udiag;
29800be760fbSHong Zhang   b->free_diag = PETSC_TRUE;
2981f4259b30SLisandro Dalcin   b->ilen      = NULL;
2982f4259b30SLisandro Dalcin   b->imax      = NULL;
29830be760fbSHong Zhang   b->row       = perm;
29840be760fbSHong Zhang   b->col       = perm;
298526fbe8dcSKarl Rupp 
29869566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
29879566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
29882205254eSKarl Rupp 
29890be760fbSHong Zhang   b->icol          = iperm;
29900be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
299126fbe8dcSKarl Rupp 
2992*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
29932205254eSKarl Rupp 
29940be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
29950be760fbSHong Zhang 
2996f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2997f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
29980be760fbSHong Zhang   if (ai[am] != 0) {
29996a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
300095b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
30010be760fbSHong Zhang   } else {
3002f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
30030be760fbSHong Zhang   }
30049263d837SHong Zhang #if defined(PETSC_USE_INFO)
30059263d837SHong Zhang   if (ai[am] != 0) {
30069263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
30079566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
30089566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
30099566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
30109263d837SHong Zhang   } else {
30119566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
30129263d837SHong Zhang   }
30139263d837SHong Zhang #endif
301435233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
30153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30160be760fbSHong Zhang }
30170be760fbSHong Zhang 
3018ff6a9541SJacob Faibussowitsch #if 0
3019ff6a9541SJacob Faibussowitsch // unused
3020ff6a9541SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3021d71ae5a4SJacob Faibussowitsch {
3022f76d2b81SHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
302310c27e3dSHong Zhang   Mat_SeqSBAIJ      *b;
30249186b105SHong Zhang   PetscBool          perm_identity, missing;
302510c27e3dSHong Zhang   PetscReal          fill = info->fill;
30265d0c19d7SBarry Smith   const PetscInt    *rip, *riip;
30275d0c19d7SBarry Smith   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
302810c27e3dSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
30292ed133dbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
30300298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303110c27e3dSHong Zhang   PetscBT            lnkbt;
30322ed133dbSHong Zhang   IS                 iperm;
3033f76d2b81SHong Zhang 
3034f76d2b81SHong Zhang   PetscFunctionBegin;
303508401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
30369566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
303728b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
30389186b105SHong Zhang 
30392ed133dbSHong Zhang   /* check whether perm is the identity mapping */
30409566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
30419566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
30429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
30439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
304410c27e3dSHong Zhang 
304510c27e3dSHong Zhang   /* initialization */
30469f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
304710c27e3dSHong Zhang   ui[0] = 0;
304810c27e3dSHong Zhang 
304910c27e3dSHong Zhang   /* jl: linked list for storing indices of the pivot rows
30501393bc97SHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
30519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
30521393bc97SHong Zhang   for (i = 0; i < am; i++) {
30539371c9d4SSatish Balay     jl[i] = am;
30549371c9d4SSatish Balay     il[i] = 0;
3055f76d2b81SHong Zhang   }
305610c27e3dSHong Zhang 
305710c27e3dSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
30581393bc97SHong Zhang   nlnk = am + 1;
30599566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
306010c27e3dSHong Zhang 
30611393bc97SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
30629566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
306310c27e3dSHong Zhang   current_space = free_space;
306410c27e3dSHong Zhang 
30651393bc97SHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
306610c27e3dSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
306710c27e3dSHong Zhang     nzk   = 0;
30682ed133dbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
306994bad497SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
30702ed133dbSHong Zhang     ncols_upper = 0;
30712ed133dbSHong Zhang     for (j = 0; j < ncols; j++) {
30729bfd6278SHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
30732ed133dbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
30742ed133dbSHong Zhang         cols[ncols_upper] = i;
30752ed133dbSHong Zhang         ncols_upper++;
30762ed133dbSHong Zhang       }
30772ed133dbSHong Zhang     }
30789566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
307910c27e3dSHong Zhang     nzk += nlnk;
308010c27e3dSHong Zhang 
308110c27e3dSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
308210c27e3dSHong Zhang     prow = jl[k]; /* 1st pivot row */
308310c27e3dSHong Zhang 
308410c27e3dSHong Zhang     while (prow < k) {
308510c27e3dSHong Zhang       nextprow = jl[prow];
308610c27e3dSHong Zhang       /* merge prow into k-th row */
30871393bc97SHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
308810c27e3dSHong Zhang       jmax   = ui[prow + 1];
308910c27e3dSHong Zhang       ncols  = jmax - jmin;
30901393bc97SHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
30919566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
309210c27e3dSHong Zhang       nzk += nlnk;
309310c27e3dSHong Zhang 
309410c27e3dSHong Zhang       /* update il and jl for prow */
309510c27e3dSHong Zhang       if (jmin < jmax) {
309610c27e3dSHong Zhang         il[prow] = jmin;
30979371c9d4SSatish Balay         j        = *uj_ptr;
30989371c9d4SSatish Balay         jl[prow] = jl[j];
30999371c9d4SSatish Balay         jl[j]    = prow;
310010c27e3dSHong Zhang       }
310110c27e3dSHong Zhang       prow = nextprow;
310210c27e3dSHong Zhang     }
310310c27e3dSHong Zhang 
310410c27e3dSHong Zhang     /* if free space is not available, make more free space */
310510c27e3dSHong Zhang     if (current_space->local_remaining < nzk) {
31061393bc97SHong Zhang       i = am - k + 1;                     /* num of unfactored rows */
310710c27e3dSHong Zhang       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
31089566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
310910c27e3dSHong Zhang       reallocs++;
311010c27e3dSHong Zhang     }
311110c27e3dSHong Zhang 
311210c27e3dSHong Zhang     /* copy data into free space, then initialize lnk */
31139566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
311410c27e3dSHong Zhang 
311510c27e3dSHong Zhang     /* add the k-th row into il and jl */
311610c27e3dSHong Zhang     if (nzk - 1 > 0) {
31171393bc97SHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
31189371c9d4SSatish Balay       jl[k] = jl[i];
31199371c9d4SSatish Balay       jl[i] = k;
312010c27e3dSHong Zhang       il[k] = ui[k] + 1;
312110c27e3dSHong Zhang     }
31222ed133dbSHong Zhang     ui_ptr[k] = current_space->array;
31232205254eSKarl Rupp 
312410c27e3dSHong Zhang     current_space->array += nzk;
312510c27e3dSHong Zhang     current_space->local_used += nzk;
312610c27e3dSHong Zhang     current_space->local_remaining -= nzk;
312710c27e3dSHong Zhang 
312810c27e3dSHong Zhang     ui[k + 1] = ui[k] + nzk;
312910c27e3dSHong Zhang   }
313010c27e3dSHong Zhang 
31316cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
31321393bc97SHong Zhang   if (ai[am] != 0) {
31334ad8454bSPierre Jolivet     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
31349566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
31359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
31369566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
313710c27e3dSHong Zhang   } else {
31389566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
313910c27e3dSHong Zhang   }
314063ba0a88SBarry Smith   #endif
314110c27e3dSHong Zhang 
31429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
31439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
31449566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
314510c27e3dSHong Zhang 
314610c27e3dSHong Zhang   /* destroy list of free space and other temporary array(s) */
31479f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
31489566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
31499566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
315010c27e3dSHong Zhang 
315110c27e3dSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
3152f284f12aSHong Zhang   b               = (Mat_SeqSBAIJ *)fact->data;
3153e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
31549f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
31559f0612e4SBarry Smith   b->free_a       = PETSC_TRUE;
315610c27e3dSHong Zhang   b->j    = uj;
315710c27e3dSHong Zhang   b->i    = ui;
3158f4259b30SLisandro Dalcin   b->diag = NULL;
3159f4259b30SLisandro Dalcin   b->ilen = NULL;
3160f4259b30SLisandro Dalcin   b->imax = NULL;
316110c27e3dSHong Zhang   b->row  = perm;
31629bfd6278SHong Zhang   b->col  = perm;
31632205254eSKarl Rupp 
31649566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
31659566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
31662205254eSKarl Rupp 
31679bfd6278SHong Zhang   b->icol          = iperm;
316810c27e3dSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
31692205254eSKarl Rupp 
31709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
31711393bc97SHong Zhang   b->maxnz = b->nz = ui[am];
317210c27e3dSHong Zhang 
3173f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
3174f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
31751393bc97SHong Zhang   if (ai[am] != 0) {
31764ad8454bSPierre Jolivet     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
317710c27e3dSHong Zhang   } else {
3178f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
317910c27e3dSHong Zhang   }
31800a3c351aSHong Zhang   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
31813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3182f76d2b81SHong Zhang }
3183ff6a9541SJacob Faibussowitsch #endif
3184599ef60dSHong Zhang 
3185d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3186d71ae5a4SJacob Faibussowitsch {
3187813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3188813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
3189813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3190813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
3191813a1f2bSShri Abhyankar   const PetscScalar *b;
3192b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
3193813a1f2bSShri Abhyankar   PetscInt           i, nz;
3194813a1f2bSShri Abhyankar 
3195813a1f2bSShri Abhyankar   PetscFunctionBegin;
31963ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3197813a1f2bSShri Abhyankar 
3198b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
31999566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
32009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3201813a1f2bSShri Abhyankar 
3202813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
3203813a1f2bSShri Abhyankar   x[0] = b[0];
3204813a1f2bSShri Abhyankar   v    = aa;
3205813a1f2bSShri Abhyankar   vi   = aj;
3206813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
3207813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
3208813a1f2bSShri Abhyankar     sum = b[i];
3209813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3210813a1f2bSShri Abhyankar     v += nz;
3211813a1f2bSShri Abhyankar     vi += nz;
3212813a1f2bSShri Abhyankar     x[i] = sum;
3213813a1f2bSShri Abhyankar   }
3214813a1f2bSShri Abhyankar 
3215813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
321662fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
32173c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
32183c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3219813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3220813a1f2bSShri Abhyankar     sum = x[i];
3221813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
32223c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3223813a1f2bSShri Abhyankar   }
3224813a1f2bSShri Abhyankar 
32259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3226b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
32279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
32289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
32293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3230813a1f2bSShri Abhyankar }
3231813a1f2bSShri Abhyankar 
3232d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3233d71ae5a4SJacob Faibussowitsch {
3234f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3235f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
3236f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3237f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
3238f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
3239f268cba8SShri Abhyankar   const PetscScalar *b;
3240b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
3241f268cba8SShri Abhyankar 
3242f268cba8SShri Abhyankar   PetscFunctionBegin;
32433ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3244f268cba8SShri Abhyankar 
3245b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
32469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
32479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
3248f268cba8SShri Abhyankar   tmp = a->solve_work;
3249f268cba8SShri Abhyankar 
32509371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
32519371c9d4SSatish Balay   r = rout;
32529371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
32539371c9d4SSatish Balay   c = cout;
3254f268cba8SShri Abhyankar 
3255f268cba8SShri Abhyankar   /* forward solve the lower triangular */
3256f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
3257f268cba8SShri Abhyankar   v      = aa;
3258f268cba8SShri Abhyankar   vi     = aj;
3259f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
3260f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
3261f268cba8SShri Abhyankar     sum = b[r[i]];
3262f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3263f268cba8SShri Abhyankar     tmp[i] = sum;
32649371c9d4SSatish Balay     v += nz;
32659371c9d4SSatish Balay     vi += nz;
3266f268cba8SShri Abhyankar   }
3267f268cba8SShri Abhyankar 
3268f268cba8SShri Abhyankar   /* backward solve the upper triangular */
3269f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
32703c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
32713c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
3272f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
3273f268cba8SShri Abhyankar     sum = tmp[i];
3274f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3275f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3276f268cba8SShri Abhyankar   }
3277f268cba8SShri Abhyankar 
32789566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
32799566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
3280b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
32819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
32829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
32839566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
32843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3285f268cba8SShri Abhyankar }
3286f268cba8SShri Abhyankar 
3287ff6a9541SJacob Faibussowitsch #if 0
3288ff6a9541SJacob Faibussowitsch // unused
3289fe97e370SBarry Smith /*
32906aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3291fe97e370SBarry Smith */
3292ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3293d71ae5a4SJacob Faibussowitsch {
32941d098868SHong Zhang   Mat             B = *fact;
3295599ef60dSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3296599ef60dSHong Zhang   IS              isicol;
32971d098868SHong Zhang   const PetscInt *r, *ic;
32981d098868SHong Zhang   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3299dc3a2fd3SHong Zhang   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3300f61a948aSLisandro Dalcin   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
33011d098868SHong Zhang   PetscInt        nlnk, *lnk;
33021d098868SHong Zhang   PetscBT         lnkbt;
3303ace3abfcSBarry Smith   PetscBool       row_identity, icol_identity;
33048aee2decSHong Zhang   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
33051d098868SHong Zhang   const PetscInt *ics;
33061d098868SHong Zhang   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3307a2ea699eSBarry Smith   PetscReal       dt = info->dt, shift = info->shiftamount;
3308f61a948aSLisandro Dalcin   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3309ace3abfcSBarry Smith   PetscBool       missing;
3310599ef60dSHong Zhang 
3311599ef60dSHong Zhang   PetscFunctionBegin;
331213bcc0bdSJacob Faibussowitsch   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3313f61a948aSLisandro Dalcin   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3314f61a948aSLisandro Dalcin 
33152ef1f0ffSBarry Smith   /* symbolic factorization, can be reused */
33169566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
331728b400f6SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
33181d098868SHong Zhang   adiag = a->diag;
3319599ef60dSHong Zhang 
33209566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3321599ef60dSHong Zhang 
3322599ef60dSHong Zhang   /* bdiag is location of diagonal in factor */
33239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
33249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3325599ef60dSHong Zhang 
33261d098868SHong Zhang   /* allocate row pointers bi */
33279f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
33281d098868SHong Zhang 
33291d098868SHong Zhang   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3330393d3a68SHong Zhang   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
33311d098868SHong Zhang   nnz_max = ai[n] + 2 * n * dtcount + 2;
33328fc3a347SHong Zhang 
33339f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
33349f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3335599ef60dSHong Zhang 
3336599ef60dSHong Zhang   /* put together the new matrix */
33379566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3338f284f12aSHong Zhang   b = (Mat_SeqAIJ *)B->data;
3339599ef60dSHong Zhang   b->free_a       = PETSC_TRUE;
3340599ef60dSHong Zhang   b->free_ij      = PETSC_TRUE;
3341599ef60dSHong Zhang   b->a    = ba;
3342599ef60dSHong Zhang   b->j    = bj;
3343599ef60dSHong Zhang   b->i    = bi;
3344599ef60dSHong Zhang   b->diag = bdiag;
3345f4259b30SLisandro Dalcin   b->ilen = NULL;
3346f4259b30SLisandro Dalcin   b->imax = NULL;
3347599ef60dSHong Zhang   b->row  = isrow;
3348599ef60dSHong Zhang   b->col  = iscol;
33499566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
33509566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
3351599ef60dSHong Zhang   b->icol = isicol;
3352599ef60dSHong Zhang 
33539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
33541d098868SHong Zhang   b->maxnz = nnz_max;
3355599ef60dSHong Zhang 
3356d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_ILUDT;
3357f284f12aSHong Zhang   B->info.factor_mallocs   = 0;
3358f284f12aSHong Zhang   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
33592ef1f0ffSBarry Smith   /*  end of symbolic factorization */
3360599ef60dSHong Zhang 
33619566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
33629566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
3363599ef60dSHong Zhang   ics = ic;
3364599ef60dSHong Zhang 
3365599ef60dSHong Zhang   /* linked list for storing column indices of the active row */
3366599ef60dSHong Zhang   nlnk = n + 1;
33679566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
33681d098868SHong Zhang 
33691d098868SHong Zhang   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
33709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
33711d098868SHong Zhang   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
33729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
33739566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n));
3374599ef60dSHong Zhang 
3375599ef60dSHong Zhang   bi[0]         = 0;
33768fc3a347SHong Zhang   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3377dc3a2fd3SHong Zhang   bdiag_rev[n]  = bdiag[0];
33788fc3a347SHong Zhang   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3379599ef60dSHong Zhang   for (i = 0; i < n; i++) {
3380599ef60dSHong Zhang     /* copy initial fill into linked list */
33818369b28dSHong Zhang     nzi = ai[r[i] + 1] - ai[r[i]];
338294bad497SJacob Faibussowitsch     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
33838369b28dSHong Zhang     nzi_al = adiag[r[i]] - ai[r[i]];
33848369b28dSHong Zhang     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3385599ef60dSHong Zhang     ajtmp  = aj + ai[r[i]];
33869566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3387599ef60dSHong Zhang 
3388599ef60dSHong Zhang     /* load in initial (unfactored row) */
33891d098868SHong Zhang     aatmp = a->a + ai[r[i]];
3390ad540459SPierre Jolivet     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3391599ef60dSHong Zhang 
3392599ef60dSHong Zhang     /* add pivot rows into linked list */
3393599ef60dSHong Zhang     row = lnk[n];
3394599ef60dSHong Zhang     while (row < i) {
33951d098868SHong Zhang       nzi_bl = bi[row + 1] - bi[row] + 1;
33961d098868SHong Zhang       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
33979566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3398599ef60dSHong Zhang       nzi += nlnk;
3399599ef60dSHong Zhang       row = lnk[row];
3400599ef60dSHong Zhang     }
3401599ef60dSHong Zhang 
34028369b28dSHong Zhang     /* copy data from lnk into jtmp, then initialize lnk */
34039566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3404599ef60dSHong Zhang 
3405599ef60dSHong Zhang     /* numerical factorization */
34068369b28dSHong Zhang     bjtmp = jtmp;
3407599ef60dSHong Zhang     row   = *bjtmp++; /* 1st pivot row */
3408599ef60dSHong Zhang     while (row < i) {
3409599ef60dSHong Zhang       pc         = rtmp + row;
34103c2a7987SHong Zhang       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
34113c2a7987SHong Zhang       multiplier = (*pc) * (*pv);
3412599ef60dSHong Zhang       *pc        = multiplier;
34133c2a7987SHong Zhang       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
34141d098868SHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
34151d098868SHong Zhang         pv = ba + bdiag[row + 1] + 1;
34161d098868SHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
34171d098868SHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
34189566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3419599ef60dSHong Zhang       }
3420599ef60dSHong Zhang       row = *bjtmp++;
3421599ef60dSHong Zhang     }
3422599ef60dSHong Zhang 
34238369b28dSHong Zhang     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
34248aee2decSHong Zhang     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
34259371c9d4SSatish Balay     nzi_bl   = 0;
34269371c9d4SSatish Balay     j        = 0;
34278369b28dSHong Zhang     while (jtmp[j] < i) { /* Note: jtmp is sorted */
34289371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
34299371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
34309371c9d4SSatish Balay       nzi_bl++;
34319371c9d4SSatish Balay       j++;
34328369b28dSHong Zhang     }
34338369b28dSHong Zhang     nzi_bu = nzi - nzi_bl - 1;
34348369b28dSHong Zhang     while (j < nzi) {
34359371c9d4SSatish Balay       vtmp[j]       = rtmp[jtmp[j]];
34369371c9d4SSatish Balay       rtmp[jtmp[j]] = 0.0;
34378369b28dSHong Zhang       j++;
34388369b28dSHong Zhang     }
34398369b28dSHong Zhang 
3440599ef60dSHong Zhang     bjtmp = bj + bi[i];
3441599ef60dSHong Zhang     batmp = ba + bi[i];
34428369b28dSHong Zhang     /* apply level dropping rule to L part */
34438369b28dSHong Zhang     ncut = nzi_al + dtcount;
34448369b28dSHong Zhang     if (ncut < nzi_bl) {
34459566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
34469566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3447599ef60dSHong Zhang     } else {
34488369b28dSHong Zhang       ncut = nzi_bl;
34498369b28dSHong Zhang     }
34508369b28dSHong Zhang     for (j = 0; j < ncut; j++) {
34518369b28dSHong Zhang       bjtmp[j] = jtmp[j];
34528369b28dSHong Zhang       batmp[j] = vtmp[j];
34538369b28dSHong Zhang     }
34541d098868SHong Zhang     bi[i + 1] = bi[i] + ncut;
34558369b28dSHong Zhang     nzi       = ncut + 1;
34568369b28dSHong Zhang 
34578369b28dSHong Zhang     /* apply level dropping rule to U part */
34588369b28dSHong Zhang     ncut = nzi_au + dtcount;
34598369b28dSHong Zhang     if (ncut < nzi_bu) {
34609566063dSJacob Faibussowitsch       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
34619566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
34628369b28dSHong Zhang     } else {
34638369b28dSHong Zhang       ncut = nzi_bu;
34648369b28dSHong Zhang     }
34658369b28dSHong Zhang     nzi += ncut;
34661d098868SHong Zhang 
34671d098868SHong Zhang     /* mark bdiagonal */
34681d098868SHong Zhang     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3469dc3a2fd3SHong Zhang     bdiag_rev[n - i - 1] = bdiag[i + 1];
34708fc3a347SHong Zhang     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
34711d098868SHong Zhang     bjtmp                = bj + bdiag[i];
34721d098868SHong Zhang     batmp                = ba + bdiag[i];
34731d098868SHong Zhang     *bjtmp               = i;
34748aee2decSHong Zhang     *batmp               = diag_tmp; /* rtmp[i]; */
3475ad540459SPierre Jolivet     if (*batmp == 0.0) *batmp = dt + shift;
3476a5b23f4aSJose E. Roman     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
34771d098868SHong Zhang 
34781d098868SHong Zhang     bjtmp = bj + bdiag[i + 1] + 1;
34791d098868SHong Zhang     batmp = ba + bdiag[i + 1] + 1;
34808369b28dSHong Zhang     for (k = 0; k < ncut; k++) {
34811d098868SHong Zhang       bjtmp[k] = jtmp[nzi_bl + 1 + k];
34821d098868SHong Zhang       batmp[k] = vtmp[nzi_bl + 1 + k];
3483599ef60dSHong Zhang     }
3484599ef60dSHong Zhang 
3485599ef60dSHong Zhang     im[i] = nzi; /* used by PetscLLAddSortedLU() */
34868369b28dSHong Zhang   }              /* for (i=0; i<n; i++) */
348763a3b9bcSJacob Faibussowitsch   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3488599ef60dSHong Zhang 
34899566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
34909566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3491599ef60dSHong Zhang 
34929566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
34939566063dSJacob Faibussowitsch   PetscCall(PetscFree2(im, jtmp));
34949566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rtmp, vtmp));
34959566063dSJacob Faibussowitsch   PetscCall(PetscFree(bdiag_rev));
3496599ef60dSHong Zhang 
34979566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n));
34981d098868SHong Zhang   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3499599ef60dSHong Zhang 
35009566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
35019566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &icol_identity));
3502599ef60dSHong Zhang   if (row_identity && icol_identity) {
350335233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3504599ef60dSHong Zhang   } else {
350535233d99SShri Abhyankar     B->ops->solve = MatSolve_SeqAIJ;
3506599ef60dSHong Zhang   }
3507599ef60dSHong Zhang 
3508f4259b30SLisandro Dalcin   B->ops->solveadd          = NULL;
3509f4259b30SLisandro Dalcin   B->ops->solvetranspose    = NULL;
3510f4259b30SLisandro Dalcin   B->ops->solvetransposeadd = NULL;
3511f4259b30SLisandro Dalcin   B->ops->matsolve          = NULL;
3512599ef60dSHong Zhang   B->assembled              = PETSC_TRUE;
3513599ef60dSHong Zhang   B->preallocated           = PETSC_TRUE;
35143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3515599ef60dSHong Zhang }
3516599ef60dSHong Zhang 
3517da81f932SPierre Jolivet /* a wrapper of MatILUDTFactor_SeqAIJ() */
3518fe97e370SBarry Smith /*
35196aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3520fe97e370SBarry Smith */
3521fe97e370SBarry Smith 
3522ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3523d71ae5a4SJacob Faibussowitsch {
35243c2a7987SHong Zhang   PetscFunctionBegin;
35259566063dSJacob Faibussowitsch   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
35263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35273c2a7987SHong Zhang }
35283c2a7987SHong Zhang 
35293c2a7987SHong Zhang /*
35303c2a7987SHong Zhang    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
35313c2a7987SHong Zhang    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
35323c2a7987SHong Zhang */
3533fe97e370SBarry Smith /*
35346aad120cSJose E. Roman     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3535fe97e370SBarry Smith */
3536fe97e370SBarry Smith 
3537ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3538d71ae5a4SJacob Faibussowitsch {
35393c2a7987SHong Zhang   Mat             C = fact;
35403c2a7987SHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
35413c2a7987SHong Zhang   IS              isrow = b->row, isicol = b->icol;
35423c2a7987SHong Zhang   const PetscInt *r, *ic, *ics;
3543b78a477dSHong Zhang   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3544b78a477dSHong Zhang   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
35453c2a7987SHong Zhang   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3546182b8fbaSHong Zhang   PetscReal       dt = info->dt, shift = info->shiftamount;
3547ace3abfcSBarry Smith   PetscBool       row_identity, col_identity;
35483c2a7987SHong Zhang 
35493c2a7987SHong Zhang   PetscFunctionBegin;
35509566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
35519566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
35529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
35533c2a7987SHong Zhang   ics = ic;
35543c2a7987SHong Zhang 
35553c2a7987SHong Zhang   for (i = 0; i < n; i++) {
3556b78a477dSHong Zhang     /* initialize rtmp array */
3557bbea24aaSStefano Zampini     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
35583c2a7987SHong Zhang     bjtmp = bj + bi[i];
3559b78a477dSHong Zhang     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3560b78a477dSHong Zhang     rtmp[i] = 0.0;
3561bbea24aaSStefano Zampini     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3562b78a477dSHong Zhang     bjtmp   = bj + bdiag[i + 1] + 1;
3563b78a477dSHong Zhang     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
35643c2a7987SHong Zhang 
3565b78a477dSHong Zhang     /* load in initial unfactored row of A */
35663c2a7987SHong Zhang     nz    = ai[r[i] + 1] - ai[r[i]];
35673c2a7987SHong Zhang     ajtmp = aj + ai[r[i]];
35683c2a7987SHong Zhang     v     = aa + ai[r[i]];
3569ad540459SPierre Jolivet     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
35703c2a7987SHong Zhang 
3571b78a477dSHong Zhang     /* numerical factorization */
3572b78a477dSHong Zhang     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3573b78a477dSHong Zhang     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3574b78a477dSHong Zhang     k     = 0;
3575b78a477dSHong Zhang     while (k < nzl) {
35763c2a7987SHong Zhang       row        = *bjtmp++;
35773c2a7987SHong Zhang       pc         = rtmp + row;
3578b78a477dSHong Zhang       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3579b78a477dSHong Zhang       multiplier = (*pc) * (*pv);
35803c2a7987SHong Zhang       *pc        = multiplier;
3581b78a477dSHong Zhang       if (PetscAbsScalar(multiplier) > dt) {
3582b78a477dSHong Zhang         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3583b78a477dSHong Zhang         pv = b->a + bdiag[row + 1] + 1;
3584b78a477dSHong Zhang         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3585b78a477dSHong Zhang         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
35869566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(1 + 2.0 * nz));
35873c2a7987SHong Zhang       }
3588b78a477dSHong Zhang       k++;
35893c2a7987SHong Zhang     }
35903c2a7987SHong Zhang 
3591b78a477dSHong Zhang     /* finished row so stick it into b->a */
3592b78a477dSHong Zhang     /* L-part */
3593b78a477dSHong Zhang     pv  = b->a + bi[i];
3594b78a477dSHong Zhang     pj  = bj + bi[i];
3595b78a477dSHong Zhang     nzl = bi[i + 1] - bi[i];
3596ad540459SPierre Jolivet     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3597b78a477dSHong Zhang 
3598a5b23f4aSJose E. Roman     /* diagonal: invert diagonal entries for simpler triangular solves */
3599b78a477dSHong Zhang     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3600b78a477dSHong Zhang     b->a[bdiag[i]] = 1.0 / rtmp[i];
3601b78a477dSHong Zhang 
3602b78a477dSHong Zhang     /* U-part */
3603b78a477dSHong Zhang     pv  = b->a + bdiag[i + 1] + 1;
3604b78a477dSHong Zhang     pj  = bj + bdiag[i + 1] + 1;
3605b78a477dSHong Zhang     nzu = bdiag[i] - bdiag[i + 1] - 1;
3606ad540459SPierre Jolivet     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3607b78a477dSHong Zhang   }
3608b78a477dSHong Zhang 
36099566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
36109566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
36119566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
3612b78a477dSHong Zhang 
36139566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
36149566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
36153c2a7987SHong Zhang   if (row_identity && col_identity) {
361635233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
36173c2a7987SHong Zhang   } else {
361835233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
36193c2a7987SHong Zhang   }
3620f4259b30SLisandro Dalcin   C->ops->solveadd          = NULL;
3621f4259b30SLisandro Dalcin   C->ops->solvetranspose    = NULL;
3622f4259b30SLisandro Dalcin   C->ops->solvetransposeadd = NULL;
3623f4259b30SLisandro Dalcin   C->ops->matsolve          = NULL;
36243c2a7987SHong Zhang   C->assembled              = PETSC_TRUE;
36253c2a7987SHong Zhang   C->preallocated           = PETSC_TRUE;
36262205254eSKarl Rupp 
36279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
36283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36293c2a7987SHong Zhang }
3630ff6a9541SJacob Faibussowitsch #endif
3631