xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) {
13   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
14   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
15   const PetscInt    *ai = a->i, *aj = a->j;
16   const PetscScalar *aa = a->a;
17   PetscBool         *done;
18   PetscReal          best, past = 0, future;
19 
20   PetscFunctionBegin;
21   /* pick initial row */
22   best = -1;
23   for (i = 0; i < n; i++) {
24     future = 0.0;
25     for (j = ai[i]; j < ai[i + 1]; j++) {
26       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27       else past = PetscAbsScalar(aa[j]);
28     }
29     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
30     if (past / future > best) {
31       best    = past / future;
32       current = i;
33     }
34   }
35 
36   PetscCall(PetscMalloc1(n, &done));
37   PetscCall(PetscArrayzero(done, n));
38   PetscCall(PetscMalloc1(n, &order));
39   order[0] = current;
40   for (i = 0; i < n - 1; i++) {
41     done[current] = PETSC_TRUE;
42     best          = -1;
43     /* loop over all neighbors of current pivot */
44     for (j = ai[current]; j < ai[current + 1]; j++) {
45       jj = aj[j];
46       if (done[jj]) continue;
47       /* loop over columns of potential next row computing weights for below and above diagonal */
48       past = future = 0.0;
49       for (k = ai[jj]; k < ai[jj + 1]; k++) {
50         kk = aj[k];
51         if (done[kk]) past += PetscAbsScalar(aa[k]);
52         else if (kk != jj) future += PetscAbsScalar(aa[k]);
53       }
54       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
55       if (past / future > best) {
56         best       = past / future;
57         newcurrent = jj;
58       }
59     }
60     if (best == -1) { /* no neighbors to select from so select best of all that remain */
61       best = -1;
62       for (k = 0; k < n; k++) {
63         if (done[k]) continue;
64         future = 0.0;
65         past   = 0.0;
66         for (j = ai[k]; j < ai[k + 1]; j++) {
67           kk = aj[j];
68           if (done[kk]) past += PetscAbsScalar(aa[j]);
69           else if (kk != k) future += PetscAbsScalar(aa[j]);
70         }
71         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
72         if (past / future > best) {
73           best       = past / future;
74           newcurrent = k;
75         }
76       }
77     }
78     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
79     current      = newcurrent;
80     order[i + 1] = current;
81   }
82   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
83   *icol = *irow;
84   PetscCall(PetscObjectReference((PetscObject)*irow));
85   PetscCall(PetscFree(done));
86   PetscCall(PetscFree(order));
87   PetscFunctionReturn(0);
88 }
89 
90 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) {
91   PetscFunctionBegin;
92   *type = MATSOLVERPETSC;
93   PetscFunctionReturn(0);
94 }
95 
96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) {
97   PetscInt n = A->rmap->n;
98 
99   PetscFunctionBegin;
100 #if defined(PETSC_USE_COMPLEX)
101   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
102 #endif
103   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
104   PetscCall(MatSetSizes(*B, n, n, n, n));
105   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106     PetscCall(MatSetType(*B, MATSEQAIJ));
107 
108     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
110 
111     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
112     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
113     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
114     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
115   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116     PetscCall(MatSetType(*B, MATSEQSBAIJ));
117     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
118 
119     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
120     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
123   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124   (*B)->factortype = ftype;
125 
126   PetscCall(PetscFree((*B)->solvertype));
127   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
128   (*B)->canuseordering = PETSC_TRUE;
129   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
134   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
135   IS                 isicol;
136   const PetscInt    *r, *ic;
137   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
138   PetscInt          *bi, *bj, *ajtmp;
139   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
140   PetscReal          f;
141   PetscInt           nlnk, *lnk, k, **bi_ptr;
142   PetscFreeSpaceList free_space = NULL, current_space = NULL;
143   PetscBT            lnkbt;
144   PetscBool          missing;
145 
146   PetscFunctionBegin;
147   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
148   PetscCall(MatMissingDiagonal(A, &missing, &i));
149   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
150 
151   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
152   PetscCall(ISGetIndices(isrow, &r));
153   PetscCall(ISGetIndices(isicol, &ic));
154 
155   /* get new row pointers */
156   PetscCall(PetscMalloc1(n + 1, &bi));
157   bi[0] = 0;
158 
159   /* bdiag is location of diagonal in factor */
160   PetscCall(PetscMalloc1(n + 1, &bdiag));
161   bdiag[0] = 0;
162 
163   /* linked list for storing column indices of the active row */
164   nlnk = n + 1;
165   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
166 
167   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
168 
169   /* initial FreeSpace size is f*(ai[n]+1) */
170   f = info->fill;
171   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
173   current_space = free_space;
174 
175   for (i = 0; i < n; i++) {
176     /* copy previous fill into linked list */
177     nzi   = 0;
178     nnz   = ai[r[i] + 1] - ai[r[i]];
179     ajtmp = aj + ai[r[i]];
180     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
181     nzi += nlnk;
182 
183     /* add pivot rows into linked list */
184     row = lnk[n];
185     while (row < i) {
186       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
188       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
189       nzi += nlnk;
190       row = lnk[row];
191     }
192     bi[i + 1] = bi[i] + nzi;
193     im[i]     = nzi;
194 
195     /* mark bdiag */
196     nzbd = 0;
197     nnz  = nzi;
198     k    = lnk[n];
199     while (nnz-- && k < i) {
200       nzbd++;
201       k = lnk[k];
202     }
203     bdiag[i] = bi[i] + nzbd;
204 
205     /* if free space is not available, make more free space */
206     if (current_space->local_remaining < nzi) {
207       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
208       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
209       reallocs++;
210     }
211 
212     /* copy data into free space, then initialize lnk */
213     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
214 
215     bi_ptr[i] = current_space->array;
216     current_space->array += nzi;
217     current_space->local_used += nzi;
218     current_space->local_remaining -= nzi;
219   }
220 #if defined(PETSC_USE_INFO)
221   if (ai[n] != 0) {
222     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
223     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
224     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
225     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
226     PetscCall(PetscInfo(A, "for best performance.\n"));
227   } else {
228     PetscCall(PetscInfo(A, "Empty matrix\n"));
229   }
230 #endif
231 
232   PetscCall(ISRestoreIndices(isrow, &r));
233   PetscCall(ISRestoreIndices(isicol, &ic));
234 
235   /* destroy list of free space and other temporary array(s) */
236   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
237   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
238   PetscCall(PetscLLDestroy(lnk, lnkbt));
239   PetscCall(PetscFree2(bi_ptr, im));
240 
241   /* put together the new matrix */
242   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
243   b = (Mat_SeqAIJ *)(B)->data;
244 
245   b->free_a       = PETSC_TRUE;
246   b->free_ij      = PETSC_TRUE;
247   b->singlemalloc = PETSC_FALSE;
248 
249   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
250   b->j    = bj;
251   b->i    = bi;
252   b->diag = bdiag;
253   b->ilen = NULL;
254   b->imax = NULL;
255   b->row  = isrow;
256   b->col  = iscol;
257   PetscCall(PetscObjectReference((PetscObject)isrow));
258   PetscCall(PetscObjectReference((PetscObject)iscol));
259   b->icol = isicol;
260   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
261 
262   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
263   b->maxnz = b->nz = bi[n];
264 
265   (B)->factortype            = MAT_FACTOR_LU;
266   (B)->info.factor_mallocs   = reallocs;
267   (B)->info.fill_ratio_given = f;
268 
269   if (ai[n]) {
270     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
271   } else {
272     (B)->info.fill_ratio_needed = 0.0;
273   }
274   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276   PetscFunctionReturn(0);
277 }
278 
279 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
280   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
281   IS                 isicol;
282   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
283   PetscInt           i, n = A->rmap->n;
284   PetscInt          *bi, *bj;
285   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
286   PetscReal          f;
287   PetscInt           nlnk, *lnk, k, **bi_ptr;
288   PetscFreeSpaceList free_space = NULL, current_space = NULL;
289   PetscBT            lnkbt;
290   PetscBool          missing;
291 
292   PetscFunctionBegin;
293   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
294   PetscCall(MatMissingDiagonal(A, &missing, &i));
295   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
296 
297   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
298   PetscCall(ISGetIndices(isrow, &r));
299   PetscCall(ISGetIndices(isicol, &ic));
300 
301   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
302   PetscCall(PetscMalloc1(n + 1, &bi));
303   PetscCall(PetscMalloc1(n + 1, &bdiag));
304   bi[0] = bdiag[0] = 0;
305 
306   /* linked list for storing column indices of the active row */
307   nlnk = n + 1;
308   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
309 
310   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
311 
312   /* initial FreeSpace size is f*(ai[n]+1) */
313   f = info->fill;
314   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
315   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
316   current_space = free_space;
317 
318   for (i = 0; i < n; i++) {
319     /* copy previous fill into linked list */
320     nzi   = 0;
321     nnz   = ai[r[i] + 1] - ai[r[i]];
322     ajtmp = aj + ai[r[i]];
323     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
324     nzi += nlnk;
325 
326     /* add pivot rows into linked list */
327     row = lnk[n];
328     while (row < i) {
329       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
330       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
331       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
332       nzi += nlnk;
333       row = lnk[row];
334     }
335     bi[i + 1] = bi[i] + nzi;
336     im[i]     = nzi;
337 
338     /* mark bdiag */
339     nzbd = 0;
340     nnz  = nzi;
341     k    = lnk[n];
342     while (nnz-- && k < i) {
343       nzbd++;
344       k = lnk[k];
345     }
346     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
347 
348     /* if free space is not available, make more free space */
349     if (current_space->local_remaining < nzi) {
350       /* estimated additional space needed */
351       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
352       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
353       reallocs++;
354     }
355 
356     /* copy data into free space, then initialize lnk */
357     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
358 
359     bi_ptr[i] = current_space->array;
360     current_space->array += nzi;
361     current_space->local_used += nzi;
362     current_space->local_remaining -= nzi;
363   }
364 
365   PetscCall(ISRestoreIndices(isrow, &r));
366   PetscCall(ISRestoreIndices(isicol, &ic));
367 
368   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
369   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
370   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
371   PetscCall(PetscLLDestroy(lnk, lnkbt));
372   PetscCall(PetscFree2(bi_ptr, im));
373 
374   /* put together the new matrix */
375   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
376   b = (Mat_SeqAIJ *)(B)->data;
377 
378   b->free_a       = PETSC_TRUE;
379   b->free_ij      = PETSC_TRUE;
380   b->singlemalloc = PETSC_FALSE;
381 
382   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
383 
384   b->j    = bj;
385   b->i    = bi;
386   b->diag = bdiag;
387   b->ilen = NULL;
388   b->imax = NULL;
389   b->row  = isrow;
390   b->col  = iscol;
391   PetscCall(PetscObjectReference((PetscObject)isrow));
392   PetscCall(PetscObjectReference((PetscObject)iscol));
393   b->icol = isicol;
394   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
395 
396   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
397   b->maxnz = b->nz = bdiag[0] + 1;
398 
399   B->factortype            = MAT_FACTOR_LU;
400   B->info.factor_mallocs   = reallocs;
401   B->info.fill_ratio_given = f;
402 
403   if (ai[n]) {
404     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
405   } else {
406     B->info.fill_ratio_needed = 0.0;
407   }
408 #if defined(PETSC_USE_INFO)
409   if (ai[n] != 0) {
410     PetscReal af = B->info.fill_ratio_needed;
411     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
412     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
413     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
414     PetscCall(PetscInfo(A, "for best performance.\n"));
415   } else {
416     PetscCall(PetscInfo(A, "Empty matrix\n"));
417   }
418 #endif
419   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
420   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
421   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
422   PetscFunctionReturn(0);
423 }
424 
425 /*
426     Trouble in factorization, should we dump the original matrix?
427 */
428 PetscErrorCode MatFactorDumpMatrix(Mat A) {
429   PetscBool flg = PETSC_FALSE;
430 
431   PetscFunctionBegin;
432   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
433   if (flg) {
434     PetscViewer viewer;
435     char        filename[PETSC_MAX_PATH_LEN];
436 
437     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
438     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
439     PetscCall(MatView(A, viewer));
440     PetscCall(PetscViewerDestroy(&viewer));
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
446   Mat              C = B;
447   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
448   IS               isrow = b->row, isicol = b->icol;
449   const PetscInt  *r, *ic, *ics;
450   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
451   PetscInt         i, j, k, nz, nzL, row, *pj;
452   const PetscInt  *ajtmp, *bjtmp;
453   MatScalar       *rtmp, *pc, multiplier, *pv;
454   const MatScalar *aa = a->a, *v;
455   PetscBool        row_identity, col_identity;
456   FactorShiftCtx   sctx;
457   const PetscInt  *ddiag;
458   PetscReal        rs;
459   MatScalar        d;
460 
461   PetscFunctionBegin;
462   /* MatPivotSetUp(): initialize shift context sctx */
463   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
464 
465   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
466     ddiag          = a->diag;
467     sctx.shift_top = info->zeropivot;
468     for (i = 0; i < n; i++) {
469       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
470       d  = (aa)[ddiag[i]];
471       rs = -PetscAbsScalar(d) - PetscRealPart(d);
472       v  = aa + ai[i];
473       nz = ai[i + 1] - ai[i];
474       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
475       if (rs > sctx.shift_top) sctx.shift_top = rs;
476     }
477     sctx.shift_top *= 1.1;
478     sctx.nshift_max = 5;
479     sctx.shift_lo   = 0.;
480     sctx.shift_hi   = 1.;
481   }
482 
483   PetscCall(ISGetIndices(isrow, &r));
484   PetscCall(ISGetIndices(isicol, &ic));
485   PetscCall(PetscMalloc1(n + 1, &rtmp));
486   ics = ic;
487 
488   do {
489     sctx.newshift = PETSC_FALSE;
490     for (i = 0; i < n; i++) {
491       /* zero rtmp */
492       /* L part */
493       nz    = bi[i + 1] - bi[i];
494       bjtmp = bj + bi[i];
495       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
496 
497       /* U part */
498       nz    = bdiag[i] - bdiag[i + 1];
499       bjtmp = bj + bdiag[i + 1] + 1;
500       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
501 
502       /* load in initial (unfactored row) */
503       nz    = ai[r[i] + 1] - ai[r[i]];
504       ajtmp = aj + ai[r[i]];
505       v     = aa + ai[r[i]];
506       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
507       /* ZeropivotApply() */
508       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
509 
510       /* elimination */
511       bjtmp = bj + bi[i];
512       row   = *bjtmp++;
513       nzL   = bi[i + 1] - bi[i];
514       for (k = 0; k < nzL; k++) {
515         pc = rtmp + row;
516         if (*pc != 0.0) {
517           pv         = b->a + bdiag[row];
518           multiplier = *pc * (*pv);
519           *pc        = multiplier;
520 
521           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
522           pv = b->a + bdiag[row + 1] + 1;
523           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
524 
525           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
526           PetscCall(PetscLogFlops(1 + 2.0 * nz));
527         }
528         row = *bjtmp++;
529       }
530 
531       /* finished row so stick it into b->a */
532       rs = 0.0;
533       /* L part */
534       pv = b->a + bi[i];
535       pj = b->j + bi[i];
536       nz = bi[i + 1] - bi[i];
537       for (j = 0; j < nz; j++) {
538         pv[j] = rtmp[pj[j]];
539         rs += PetscAbsScalar(pv[j]);
540       }
541 
542       /* U part */
543       pv = b->a + bdiag[i + 1] + 1;
544       pj = b->j + bdiag[i + 1] + 1;
545       nz = bdiag[i] - bdiag[i + 1] - 1;
546       for (j = 0; j < nz; j++) {
547         pv[j] = rtmp[pj[j]];
548         rs += PetscAbsScalar(pv[j]);
549       }
550 
551       sctx.rs = rs;
552       sctx.pv = rtmp[i];
553       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
554       if (sctx.newshift) break; /* break for-loop */
555       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
556 
557       /* Mark diagonal and invert diagonal for simpler triangular solves */
558       pv  = b->a + bdiag[i];
559       *pv = 1.0 / rtmp[i];
560 
561     } /* endof for (i=0; i<n; i++) { */
562 
563     /* MatPivotRefine() */
564     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
565       /*
566        * if no shift in this attempt & shifting & started shifting & can refine,
567        * then try lower shift
568        */
569       sctx.shift_hi       = sctx.shift_fraction;
570       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
571       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
572       sctx.newshift       = PETSC_TRUE;
573       sctx.nshift++;
574     }
575   } while (sctx.newshift);
576 
577   PetscCall(PetscFree(rtmp));
578   PetscCall(ISRestoreIndices(isicol, &ic));
579   PetscCall(ISRestoreIndices(isrow, &r));
580 
581   PetscCall(ISIdentity(isrow, &row_identity));
582   PetscCall(ISIdentity(isicol, &col_identity));
583   if (b->inode.size) {
584     C->ops->solve = MatSolve_SeqAIJ_Inode;
585   } else if (row_identity && col_identity) {
586     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
587   } else {
588     C->ops->solve = MatSolve_SeqAIJ;
589   }
590   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
591   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
592   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
593   C->ops->matsolve          = MatMatSolve_SeqAIJ;
594   C->assembled              = PETSC_TRUE;
595   C->preallocated           = PETSC_TRUE;
596 
597   PetscCall(PetscLogFlops(C->cmap->n));
598 
599   /* MatShiftView(A,info,&sctx) */
600   if (sctx.nshift) {
601     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
602       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
603     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
604       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
605     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
606       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
607     }
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
613   Mat              C = B;
614   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
615   IS               isrow = b->row, isicol = b->icol;
616   const PetscInt  *r, *ic, *ics;
617   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
618   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
619   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
620   MatScalar       *pv, *rtmp, *pc, multiplier, d;
621   const MatScalar *v, *aa = a->a;
622   PetscReal        rs = 0.0;
623   FactorShiftCtx   sctx;
624   const PetscInt  *ddiag;
625   PetscBool        row_identity, col_identity;
626 
627   PetscFunctionBegin;
628   /* MatPivotSetUp(): initialize shift context sctx */
629   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
630 
631   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
632     ddiag          = a->diag;
633     sctx.shift_top = info->zeropivot;
634     for (i = 0; i < n; i++) {
635       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
636       d  = (aa)[ddiag[i]];
637       rs = -PetscAbsScalar(d) - PetscRealPart(d);
638       v  = aa + ai[i];
639       nz = ai[i + 1] - ai[i];
640       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
641       if (rs > sctx.shift_top) sctx.shift_top = rs;
642     }
643     sctx.shift_top *= 1.1;
644     sctx.nshift_max = 5;
645     sctx.shift_lo   = 0.;
646     sctx.shift_hi   = 1.;
647   }
648 
649   PetscCall(ISGetIndices(isrow, &r));
650   PetscCall(ISGetIndices(isicol, &ic));
651   PetscCall(PetscMalloc1(n + 1, &rtmp));
652   ics = ic;
653 
654   do {
655     sctx.newshift = PETSC_FALSE;
656     for (i = 0; i < n; i++) {
657       nz    = bi[i + 1] - bi[i];
658       bjtmp = bj + bi[i];
659       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
660 
661       /* load in initial (unfactored row) */
662       nz    = ai[r[i] + 1] - ai[r[i]];
663       ajtmp = aj + ai[r[i]];
664       v     = aa + ai[r[i]];
665       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
666       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
667 
668       row = *bjtmp++;
669       while (row < i) {
670         pc = rtmp + row;
671         if (*pc != 0.0) {
672           pv         = b->a + diag_offset[row];
673           pj         = b->j + diag_offset[row] + 1;
674           multiplier = *pc / *pv++;
675           *pc        = multiplier;
676           nz         = bi[row + 1] - diag_offset[row] - 1;
677           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
678           PetscCall(PetscLogFlops(1 + 2.0 * nz));
679         }
680         row = *bjtmp++;
681       }
682       /* finished row so stick it into b->a */
683       pv   = b->a + bi[i];
684       pj   = b->j + bi[i];
685       nz   = bi[i + 1] - bi[i];
686       diag = diag_offset[i] - bi[i];
687       rs   = 0.0;
688       for (j = 0; j < nz; j++) {
689         pv[j] = rtmp[pj[j]];
690         rs += PetscAbsScalar(pv[j]);
691       }
692       rs -= PetscAbsScalar(pv[diag]);
693 
694       sctx.rs = rs;
695       sctx.pv = pv[diag];
696       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
697       if (sctx.newshift) break;
698       pv[diag] = sctx.pv;
699     }
700 
701     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
702       /*
703        * if no shift in this attempt & shifting & started shifting & can refine,
704        * then try lower shift
705        */
706       sctx.shift_hi       = sctx.shift_fraction;
707       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
708       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
709       sctx.newshift       = PETSC_TRUE;
710       sctx.nshift++;
711     }
712   } while (sctx.newshift);
713 
714   /* invert diagonal entries for simpler triangular solves */
715   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
716   PetscCall(PetscFree(rtmp));
717   PetscCall(ISRestoreIndices(isicol, &ic));
718   PetscCall(ISRestoreIndices(isrow, &r));
719 
720   PetscCall(ISIdentity(isrow, &row_identity));
721   PetscCall(ISIdentity(isicol, &col_identity));
722   if (row_identity && col_identity) {
723     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
724   } else {
725     C->ops->solve = MatSolve_SeqAIJ_inplace;
726   }
727   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
728   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
729   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
730   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
731 
732   C->assembled    = PETSC_TRUE;
733   C->preallocated = PETSC_TRUE;
734 
735   PetscCall(PetscLogFlops(C->cmap->n));
736   if (sctx.nshift) {
737     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
738       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
739     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
740       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
741     }
742   }
743   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
744   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
745 
746   PetscCall(MatSeqAIJCheckInode(C));
747   PetscFunctionReturn(0);
748 }
749 
750 /*
751    This routine implements inplace ILU(0) with row or/and column permutations.
752    Input:
753      A - original matrix
754    Output;
755      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
756          a->j (col index) is permuted by the inverse of colperm, then sorted
757          a->a reordered accordingly with a->j
758          a->diag (ptr to diagonal elements) is updated.
759 */
760 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) {
761   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
762   IS               isrow = a->row, isicol = a->icol;
763   const PetscInt  *r, *ic, *ics;
764   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
765   PetscInt        *ajtmp, nz, row;
766   PetscInt        *diag = a->diag, nbdiag, *pj;
767   PetscScalar     *rtmp, *pc, multiplier, d;
768   MatScalar       *pv, *v;
769   PetscReal        rs;
770   FactorShiftCtx   sctx;
771   const MatScalar *aa = a->a, *vtmp;
772 
773   PetscFunctionBegin;
774   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
775 
776   /* MatPivotSetUp(): initialize shift context sctx */
777   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
778 
779   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
780     const PetscInt *ddiag = a->diag;
781     sctx.shift_top        = info->zeropivot;
782     for (i = 0; i < n; i++) {
783       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
784       d    = (aa)[ddiag[i]];
785       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
786       vtmp = aa + ai[i];
787       nz   = ai[i + 1] - ai[i];
788       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
789       if (rs > sctx.shift_top) sctx.shift_top = rs;
790     }
791     sctx.shift_top *= 1.1;
792     sctx.nshift_max = 5;
793     sctx.shift_lo   = 0.;
794     sctx.shift_hi   = 1.;
795   }
796 
797   PetscCall(ISGetIndices(isrow, &r));
798   PetscCall(ISGetIndices(isicol, &ic));
799   PetscCall(PetscMalloc1(n + 1, &rtmp));
800   PetscCall(PetscArrayzero(rtmp, n + 1));
801   ics = ic;
802 
803 #if defined(MV)
804   sctx.shift_top      = 0.;
805   sctx.nshift_max     = 0;
806   sctx.shift_lo       = 0.;
807   sctx.shift_hi       = 0.;
808   sctx.shift_fraction = 0.;
809 
810   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
811     sctx.shift_top = 0.;
812     for (i = 0; i < n; i++) {
813       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814       d  = (a->a)[diag[i]];
815       rs = -PetscAbsScalar(d) - PetscRealPart(d);
816       v  = a->a + ai[i];
817       nz = ai[i + 1] - ai[i];
818       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
819       if (rs > sctx.shift_top) sctx.shift_top = rs;
820     }
821     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
822     sctx.shift_top *= 1.1;
823     sctx.nshift_max = 5;
824     sctx.shift_lo   = 0.;
825     sctx.shift_hi   = 1.;
826   }
827 
828   sctx.shift_amount = 0.;
829   sctx.nshift       = 0;
830 #endif
831 
832   do {
833     sctx.newshift = PETSC_FALSE;
834     for (i = 0; i < n; i++) {
835       /* load in initial unfactored row */
836       nz    = ai[r[i] + 1] - ai[r[i]];
837       ajtmp = aj + ai[r[i]];
838       v     = a->a + ai[r[i]];
839       /* sort permuted ajtmp and values v accordingly */
840       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
841       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
842 
843       diag[r[i]] = ai[r[i]];
844       for (j = 0; j < nz; j++) {
845         rtmp[ajtmp[j]] = v[j];
846         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
847       }
848       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
849 
850       row = *ajtmp++;
851       while (row < i) {
852         pc = rtmp + row;
853         if (*pc != 0.0) {
854           pv = a->a + diag[r[row]];
855           pj = aj + diag[r[row]] + 1;
856 
857           multiplier = *pc / *pv++;
858           *pc        = multiplier;
859           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
860           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
861           PetscCall(PetscLogFlops(1 + 2.0 * nz));
862         }
863         row = *ajtmp++;
864       }
865       /* finished row so overwrite it onto a->a */
866       pv     = a->a + ai[r[i]];
867       pj     = aj + ai[r[i]];
868       nz     = ai[r[i] + 1] - ai[r[i]];
869       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
870 
871       rs = 0.0;
872       for (j = 0; j < nz; j++) {
873         pv[j] = rtmp[pj[j]];
874         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
875       }
876 
877       sctx.rs = rs;
878       sctx.pv = pv[nbdiag];
879       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
880       if (sctx.newshift) break;
881       pv[nbdiag] = sctx.pv;
882     }
883 
884     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
885       /*
886        * if no shift in this attempt & shifting & started shifting & can refine,
887        * then try lower shift
888        */
889       sctx.shift_hi       = sctx.shift_fraction;
890       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
891       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
892       sctx.newshift       = PETSC_TRUE;
893       sctx.nshift++;
894     }
895   } while (sctx.newshift);
896 
897   /* invert diagonal entries for simpler triangular solves */
898   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
899 
900   PetscCall(PetscFree(rtmp));
901   PetscCall(ISRestoreIndices(isicol, &ic));
902   PetscCall(ISRestoreIndices(isrow, &r));
903 
904   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
905   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
906   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
907   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
908 
909   A->assembled    = PETSC_TRUE;
910   A->preallocated = PETSC_TRUE;
911 
912   PetscCall(PetscLogFlops(A->cmap->n));
913   if (sctx.nshift) {
914     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
915       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
916     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
917       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
918     }
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 /* ----------------------------------------------------------- */
924 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) {
925   Mat C;
926 
927   PetscFunctionBegin;
928   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
929   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
930   PetscCall(MatLUFactorNumeric(C, A, info));
931 
932   A->ops->solve          = C->ops->solve;
933   A->ops->solvetranspose = C->ops->solvetranspose;
934 
935   PetscCall(MatHeaderMerge(A, &C));
936   PetscFunctionReturn(0);
937 }
938 /* ----------------------------------------------------------- */
939 
940 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
941   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
942   IS                 iscol = a->col, isrow = a->row;
943   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
944   PetscInt           nz;
945   const PetscInt    *rout, *cout, *r, *c;
946   PetscScalar       *x, *tmp, *tmps, sum;
947   const PetscScalar *b;
948   const MatScalar   *aa = a->a, *v;
949 
950   PetscFunctionBegin;
951   if (!n) PetscFunctionReturn(0);
952 
953   PetscCall(VecGetArrayRead(bb, &b));
954   PetscCall(VecGetArrayWrite(xx, &x));
955   tmp = a->solve_work;
956 
957   PetscCall(ISGetIndices(isrow, &rout));
958   r = rout;
959   PetscCall(ISGetIndices(iscol, &cout));
960   c = cout + (n - 1);
961 
962   /* forward solve the lower triangular */
963   tmp[0] = b[*r++];
964   tmps   = tmp;
965   for (i = 1; i < n; i++) {
966     v   = aa + ai[i];
967     vi  = aj + ai[i];
968     nz  = a->diag[i] - ai[i];
969     sum = b[*r++];
970     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
971     tmp[i] = sum;
972   }
973 
974   /* backward solve the upper triangular */
975   for (i = n - 1; i >= 0; i--) {
976     v   = aa + a->diag[i] + 1;
977     vi  = aj + a->diag[i] + 1;
978     nz  = ai[i + 1] - a->diag[i] - 1;
979     sum = tmp[i];
980     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
981     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
982   }
983 
984   PetscCall(ISRestoreIndices(isrow, &rout));
985   PetscCall(ISRestoreIndices(iscol, &cout));
986   PetscCall(VecRestoreArrayRead(bb, &b));
987   PetscCall(VecRestoreArrayWrite(xx, &x));
988   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) {
993   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
994   IS                 iscol = a->col, isrow = a->row;
995   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
996   PetscInt           nz, neq, ldb, ldx;
997   const PetscInt    *rout, *cout, *r, *c;
998   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
999   const PetscScalar *b, *aa  = a->a, *v;
1000   PetscBool          isdense;
1001 
1002   PetscFunctionBegin;
1003   if (!n) PetscFunctionReturn(0);
1004   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1005   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1006   if (X != B) {
1007     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1008     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1009   }
1010   PetscCall(MatDenseGetArrayRead(B, &b));
1011   PetscCall(MatDenseGetLDA(B, &ldb));
1012   PetscCall(MatDenseGetArray(X, &x));
1013   PetscCall(MatDenseGetLDA(X, &ldx));
1014   PetscCall(ISGetIndices(isrow, &rout));
1015   r = rout;
1016   PetscCall(ISGetIndices(iscol, &cout));
1017   c = cout;
1018   for (neq = 0; neq < B->cmap->n; neq++) {
1019     /* forward solve the lower triangular */
1020     tmp[0] = b[r[0]];
1021     tmps   = tmp;
1022     for (i = 1; i < n; i++) {
1023       v   = aa + ai[i];
1024       vi  = aj + ai[i];
1025       nz  = a->diag[i] - ai[i];
1026       sum = b[r[i]];
1027       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1028       tmp[i] = sum;
1029     }
1030     /* backward solve the upper triangular */
1031     for (i = n - 1; i >= 0; i--) {
1032       v   = aa + a->diag[i] + 1;
1033       vi  = aj + a->diag[i] + 1;
1034       nz  = ai[i + 1] - a->diag[i] - 1;
1035       sum = tmp[i];
1036       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1037       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1038     }
1039     b += ldb;
1040     x += ldx;
1041   }
1042   PetscCall(ISRestoreIndices(isrow, &rout));
1043   PetscCall(ISRestoreIndices(iscol, &cout));
1044   PetscCall(MatDenseRestoreArrayRead(B, &b));
1045   PetscCall(MatDenseRestoreArray(X, &x));
1046   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) {
1051   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1052   IS                 iscol = a->col, isrow = a->row;
1053   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1054   PetscInt           nz, neq, ldb, ldx;
1055   const PetscInt    *rout, *cout, *r, *c;
1056   PetscScalar       *x, *tmp = a->solve_work, sum;
1057   const PetscScalar *b, *aa  = a->a, *v;
1058   PetscBool          isdense;
1059 
1060   PetscFunctionBegin;
1061   if (!n) PetscFunctionReturn(0);
1062   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1063   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1064   if (X != B) {
1065     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1066     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1067   }
1068   PetscCall(MatDenseGetArrayRead(B, &b));
1069   PetscCall(MatDenseGetLDA(B, &ldb));
1070   PetscCall(MatDenseGetArray(X, &x));
1071   PetscCall(MatDenseGetLDA(X, &ldx));
1072   PetscCall(ISGetIndices(isrow, &rout));
1073   r = rout;
1074   PetscCall(ISGetIndices(iscol, &cout));
1075   c = cout;
1076   for (neq = 0; neq < B->cmap->n; neq++) {
1077     /* forward solve the lower triangular */
1078     tmp[0] = b[r[0]];
1079     v      = aa;
1080     vi     = aj;
1081     for (i = 1; i < n; i++) {
1082       nz  = ai[i + 1] - ai[i];
1083       sum = b[r[i]];
1084       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1085       tmp[i] = sum;
1086       v += nz;
1087       vi += nz;
1088     }
1089     /* backward solve the upper triangular */
1090     for (i = n - 1; i >= 0; i--) {
1091       v   = aa + adiag[i + 1] + 1;
1092       vi  = aj + adiag[i + 1] + 1;
1093       nz  = adiag[i] - adiag[i + 1] - 1;
1094       sum = tmp[i];
1095       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1096       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1097     }
1098     b += ldb;
1099     x += ldx;
1100   }
1101   PetscCall(ISRestoreIndices(isrow, &rout));
1102   PetscCall(ISRestoreIndices(iscol, &cout));
1103   PetscCall(MatDenseRestoreArrayRead(B, &b));
1104   PetscCall(MatDenseRestoreArray(X, &x));
1105   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) {
1110   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1111   IS                 iscol = a->col, isrow = a->row;
1112   const PetscInt    *r, *c, *rout, *cout;
1113   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1114   PetscInt           nz, row;
1115   PetscScalar       *x, *tmp, *tmps, sum;
1116   const PetscScalar *b;
1117   const MatScalar   *aa = a->a, *v;
1118 
1119   PetscFunctionBegin;
1120   if (!n) PetscFunctionReturn(0);
1121 
1122   PetscCall(VecGetArrayRead(bb, &b));
1123   PetscCall(VecGetArrayWrite(xx, &x));
1124   tmp = a->solve_work;
1125 
1126   PetscCall(ISGetIndices(isrow, &rout));
1127   r = rout;
1128   PetscCall(ISGetIndices(iscol, &cout));
1129   c = cout + (n - 1);
1130 
1131   /* forward solve the lower triangular */
1132   tmp[0] = b[*r++];
1133   tmps   = tmp;
1134   for (row = 1; row < n; row++) {
1135     i   = rout[row]; /* permuted row */
1136     v   = aa + ai[i];
1137     vi  = aj + ai[i];
1138     nz  = a->diag[i] - ai[i];
1139     sum = b[*r++];
1140     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1141     tmp[row] = sum;
1142   }
1143 
1144   /* backward solve the upper triangular */
1145   for (row = n - 1; row >= 0; row--) {
1146     i   = rout[row]; /* permuted row */
1147     v   = aa + a->diag[i] + 1;
1148     vi  = aj + a->diag[i] + 1;
1149     nz  = ai[i + 1] - a->diag[i] - 1;
1150     sum = tmp[row];
1151     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1152     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1153   }
1154 
1155   PetscCall(ISRestoreIndices(isrow, &rout));
1156   PetscCall(ISRestoreIndices(iscol, &cout));
1157   PetscCall(VecRestoreArrayRead(bb, &b));
1158   PetscCall(VecRestoreArrayWrite(xx, &x));
1159   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 /* ----------------------------------------------------------- */
1164 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1165 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
1166   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1167   PetscInt           n  = A->rmap->n;
1168   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1169   PetscScalar       *x;
1170   const PetscScalar *b;
1171   const MatScalar   *aa = a->a;
1172 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1173   PetscInt         adiag_i, i, nz, ai_i;
1174   const PetscInt  *vi;
1175   const MatScalar *v;
1176   PetscScalar      sum;
1177 #endif
1178 
1179   PetscFunctionBegin;
1180   if (!n) PetscFunctionReturn(0);
1181 
1182   PetscCall(VecGetArrayRead(bb, &b));
1183   PetscCall(VecGetArrayWrite(xx, &x));
1184 
1185 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1186   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1187 #else
1188   /* forward solve the lower triangular */
1189   x[0] = b[0];
1190   for (i = 1; i < n; i++) {
1191     ai_i = ai[i];
1192     v    = aa + ai_i;
1193     vi   = aj + ai_i;
1194     nz   = adiag[i] - ai_i;
1195     sum  = b[i];
1196     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1197     x[i] = sum;
1198   }
1199 
1200   /* backward solve the upper triangular */
1201   for (i = n - 1; i >= 0; i--) {
1202     adiag_i = adiag[i];
1203     v       = aa + adiag_i + 1;
1204     vi      = aj + adiag_i + 1;
1205     nz      = ai[i + 1] - adiag_i - 1;
1206     sum     = x[i];
1207     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1208     x[i] = sum * aa[adiag_i];
1209   }
1210 #endif
1211   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1212   PetscCall(VecRestoreArrayRead(bb, &b));
1213   PetscCall(VecRestoreArrayWrite(xx, &x));
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) {
1218   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1219   IS                 iscol = a->col, isrow = a->row;
1220   PetscInt           i, n                  = A->rmap->n, j;
1221   PetscInt           nz;
1222   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1223   PetscScalar       *x, *tmp, sum;
1224   const PetscScalar *b;
1225   const MatScalar   *aa = a->a, *v;
1226 
1227   PetscFunctionBegin;
1228   if (yy != xx) PetscCall(VecCopy(yy, xx));
1229 
1230   PetscCall(VecGetArrayRead(bb, &b));
1231   PetscCall(VecGetArray(xx, &x));
1232   tmp = a->solve_work;
1233 
1234   PetscCall(ISGetIndices(isrow, &rout));
1235   r = rout;
1236   PetscCall(ISGetIndices(iscol, &cout));
1237   c = cout + (n - 1);
1238 
1239   /* forward solve the lower triangular */
1240   tmp[0] = b[*r++];
1241   for (i = 1; i < n; i++) {
1242     v   = aa + ai[i];
1243     vi  = aj + ai[i];
1244     nz  = a->diag[i] - ai[i];
1245     sum = b[*r++];
1246     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1247     tmp[i] = sum;
1248   }
1249 
1250   /* backward solve the upper triangular */
1251   for (i = n - 1; i >= 0; i--) {
1252     v   = aa + a->diag[i] + 1;
1253     vi  = aj + a->diag[i] + 1;
1254     nz  = ai[i + 1] - a->diag[i] - 1;
1255     sum = tmp[i];
1256     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1257     tmp[i] = sum * aa[a->diag[i]];
1258     x[*c--] += tmp[i];
1259   }
1260 
1261   PetscCall(ISRestoreIndices(isrow, &rout));
1262   PetscCall(ISRestoreIndices(iscol, &cout));
1263   PetscCall(VecRestoreArrayRead(bb, &b));
1264   PetscCall(VecRestoreArray(xx, &x));
1265   PetscCall(PetscLogFlops(2.0 * a->nz));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) {
1270   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1271   IS                 iscol = a->col, isrow = a->row;
1272   PetscInt           i, n                  = A->rmap->n, j;
1273   PetscInt           nz;
1274   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1275   PetscScalar       *x, *tmp, sum;
1276   const PetscScalar *b;
1277   const MatScalar   *aa = a->a, *v;
1278 
1279   PetscFunctionBegin;
1280   if (yy != xx) PetscCall(VecCopy(yy, xx));
1281 
1282   PetscCall(VecGetArrayRead(bb, &b));
1283   PetscCall(VecGetArray(xx, &x));
1284   tmp = a->solve_work;
1285 
1286   PetscCall(ISGetIndices(isrow, &rout));
1287   r = rout;
1288   PetscCall(ISGetIndices(iscol, &cout));
1289   c = cout;
1290 
1291   /* forward solve the lower triangular */
1292   tmp[0] = b[r[0]];
1293   v      = aa;
1294   vi     = aj;
1295   for (i = 1; i < n; i++) {
1296     nz  = ai[i + 1] - ai[i];
1297     sum = b[r[i]];
1298     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1299     tmp[i] = sum;
1300     v += nz;
1301     vi += nz;
1302   }
1303 
1304   /* backward solve the upper triangular */
1305   v  = aa + adiag[n - 1];
1306   vi = aj + adiag[n - 1];
1307   for (i = n - 1; i >= 0; i--) {
1308     nz  = adiag[i] - adiag[i + 1] - 1;
1309     sum = tmp[i];
1310     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1311     tmp[i] = sum * v[nz];
1312     x[c[i]] += tmp[i];
1313     v += nz + 1;
1314     vi += nz + 1;
1315   }
1316 
1317   PetscCall(ISRestoreIndices(isrow, &rout));
1318   PetscCall(ISRestoreIndices(iscol, &cout));
1319   PetscCall(VecRestoreArrayRead(bb, &b));
1320   PetscCall(VecRestoreArray(xx, &x));
1321   PetscCall(PetscLogFlops(2.0 * a->nz));
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
1326   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1327   IS                 iscol = a->col, isrow = a->row;
1328   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1329   PetscInt           i, n = A->rmap->n, j;
1330   PetscInt           nz;
1331   PetscScalar       *x, *tmp, s1;
1332   const MatScalar   *aa = a->a, *v;
1333   const PetscScalar *b;
1334 
1335   PetscFunctionBegin;
1336   PetscCall(VecGetArrayRead(bb, &b));
1337   PetscCall(VecGetArrayWrite(xx, &x));
1338   tmp = a->solve_work;
1339 
1340   PetscCall(ISGetIndices(isrow, &rout));
1341   r = rout;
1342   PetscCall(ISGetIndices(iscol, &cout));
1343   c = cout;
1344 
1345   /* copy the b into temp work space according to permutation */
1346   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1347 
1348   /* forward solve the U^T */
1349   for (i = 0; i < n; i++) {
1350     v  = aa + diag[i];
1351     vi = aj + diag[i] + 1;
1352     nz = ai[i + 1] - diag[i] - 1;
1353     s1 = tmp[i];
1354     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1355     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1356     tmp[i] = s1;
1357   }
1358 
1359   /* backward solve the L^T */
1360   for (i = n - 1; i >= 0; i--) {
1361     v  = aa + diag[i] - 1;
1362     vi = aj + diag[i] - 1;
1363     nz = diag[i] - ai[i];
1364     s1 = tmp[i];
1365     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1366   }
1367 
1368   /* copy tmp into x according to permutation */
1369   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1370 
1371   PetscCall(ISRestoreIndices(isrow, &rout));
1372   PetscCall(ISRestoreIndices(iscol, &cout));
1373   PetscCall(VecRestoreArrayRead(bb, &b));
1374   PetscCall(VecRestoreArrayWrite(xx, &x));
1375 
1376   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) {
1381   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1382   IS                 iscol = a->col, isrow = a->row;
1383   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1384   PetscInt           i, n = A->rmap->n, j;
1385   PetscInt           nz;
1386   PetscScalar       *x, *tmp, s1;
1387   const MatScalar   *aa = a->a, *v;
1388   const PetscScalar *b;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(VecGetArrayRead(bb, &b));
1392   PetscCall(VecGetArrayWrite(xx, &x));
1393   tmp = a->solve_work;
1394 
1395   PetscCall(ISGetIndices(isrow, &rout));
1396   r = rout;
1397   PetscCall(ISGetIndices(iscol, &cout));
1398   c = cout;
1399 
1400   /* copy the b into temp work space according to permutation */
1401   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1402 
1403   /* forward solve the U^T */
1404   for (i = 0; i < n; i++) {
1405     v  = aa + adiag[i + 1] + 1;
1406     vi = aj + adiag[i + 1] + 1;
1407     nz = adiag[i] - adiag[i + 1] - 1;
1408     s1 = tmp[i];
1409     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1410     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1411     tmp[i] = s1;
1412   }
1413 
1414   /* backward solve the L^T */
1415   for (i = n - 1; i >= 0; i--) {
1416     v  = aa + ai[i];
1417     vi = aj + ai[i];
1418     nz = ai[i + 1] - ai[i];
1419     s1 = tmp[i];
1420     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1421   }
1422 
1423   /* copy tmp into x according to permutation */
1424   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1425 
1426   PetscCall(ISRestoreIndices(isrow, &rout));
1427   PetscCall(ISRestoreIndices(iscol, &cout));
1428   PetscCall(VecRestoreArrayRead(bb, &b));
1429   PetscCall(VecRestoreArrayWrite(xx, &x));
1430 
1431   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) {
1436   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1437   IS                 iscol = a->col, isrow = a->row;
1438   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1439   PetscInt           i, n = A->rmap->n, j;
1440   PetscInt           nz;
1441   PetscScalar       *x, *tmp, s1;
1442   const MatScalar   *aa = a->a, *v;
1443   const PetscScalar *b;
1444 
1445   PetscFunctionBegin;
1446   if (zz != xx) PetscCall(VecCopy(zz, xx));
1447   PetscCall(VecGetArrayRead(bb, &b));
1448   PetscCall(VecGetArray(xx, &x));
1449   tmp = a->solve_work;
1450 
1451   PetscCall(ISGetIndices(isrow, &rout));
1452   r = rout;
1453   PetscCall(ISGetIndices(iscol, &cout));
1454   c = cout;
1455 
1456   /* copy the b into temp work space according to permutation */
1457   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1458 
1459   /* forward solve the U^T */
1460   for (i = 0; i < n; i++) {
1461     v  = aa + diag[i];
1462     vi = aj + diag[i] + 1;
1463     nz = ai[i + 1] - diag[i] - 1;
1464     s1 = tmp[i];
1465     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1466     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1467     tmp[i] = s1;
1468   }
1469 
1470   /* backward solve the L^T */
1471   for (i = n - 1; i >= 0; i--) {
1472     v  = aa + diag[i] - 1;
1473     vi = aj + diag[i] - 1;
1474     nz = diag[i] - ai[i];
1475     s1 = tmp[i];
1476     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1477   }
1478 
1479   /* copy tmp into x according to permutation */
1480   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1481 
1482   PetscCall(ISRestoreIndices(isrow, &rout));
1483   PetscCall(ISRestoreIndices(iscol, &cout));
1484   PetscCall(VecRestoreArrayRead(bb, &b));
1485   PetscCall(VecRestoreArray(xx, &x));
1486 
1487   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) {
1492   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1493   IS                 iscol = a->col, isrow = a->row;
1494   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1495   PetscInt           i, n = A->rmap->n, j;
1496   PetscInt           nz;
1497   PetscScalar       *x, *tmp, s1;
1498   const MatScalar   *aa = a->a, *v;
1499   const PetscScalar *b;
1500 
1501   PetscFunctionBegin;
1502   if (zz != xx) PetscCall(VecCopy(zz, xx));
1503   PetscCall(VecGetArrayRead(bb, &b));
1504   PetscCall(VecGetArray(xx, &x));
1505   tmp = a->solve_work;
1506 
1507   PetscCall(ISGetIndices(isrow, &rout));
1508   r = rout;
1509   PetscCall(ISGetIndices(iscol, &cout));
1510   c = cout;
1511 
1512   /* copy the b into temp work space according to permutation */
1513   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1514 
1515   /* forward solve the U^T */
1516   for (i = 0; i < n; i++) {
1517     v  = aa + adiag[i + 1] + 1;
1518     vi = aj + adiag[i + 1] + 1;
1519     nz = adiag[i] - adiag[i + 1] - 1;
1520     s1 = tmp[i];
1521     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1522     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1523     tmp[i] = s1;
1524   }
1525 
1526   /* backward solve the L^T */
1527   for (i = n - 1; i >= 0; i--) {
1528     v  = aa + ai[i];
1529     vi = aj + ai[i];
1530     nz = ai[i + 1] - ai[i];
1531     s1 = tmp[i];
1532     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1533   }
1534 
1535   /* copy tmp into x according to permutation */
1536   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1537 
1538   PetscCall(ISRestoreIndices(isrow, &rout));
1539   PetscCall(ISRestoreIndices(iscol, &cout));
1540   PetscCall(VecRestoreArrayRead(bb, &b));
1541   PetscCall(VecRestoreArray(xx, &x));
1542 
1543   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 /* ----------------------------------------------------------------*/
1548 
1549 /*
1550    ilu() under revised new data structure.
1551    Factored arrays bj and ba are stored as
1552      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1553 
1554    bi=fact->i is an array of size n+1, in which
1555      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1556      bi[n]:  points to L(n-1,n-1)+1
1557 
1558   bdiag=fact->diag is an array of size n+1,in which
1559      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1560      bdiag[n]: points to entry of U(n-1,0)-1
1561 
1562    U(i,:) contains bdiag[i] as its last entry, i.e.,
1563     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1564 */
1565 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1566   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1567   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1568   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1569   IS             isicol;
1570 
1571   PetscFunctionBegin;
1572   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1573   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1574   b = (Mat_SeqAIJ *)(fact)->data;
1575 
1576   /* allocate matrix arrays for new data structure */
1577   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1578 
1579   b->singlemalloc = PETSC_TRUE;
1580   if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); }
1581   bdiag = b->diag;
1582 
1583   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1584 
1585   /* set bi and bj with new data structure */
1586   bi = b->i;
1587   bj = b->j;
1588 
1589   /* L part */
1590   bi[0] = 0;
1591   for (i = 0; i < n; i++) {
1592     nz        = adiag[i] - ai[i];
1593     bi[i + 1] = bi[i] + nz;
1594     aj        = a->j + ai[i];
1595     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1596   }
1597 
1598   /* U part */
1599   bdiag[n] = bi[n] - 1;
1600   for (i = n - 1; i >= 0; i--) {
1601     nz = ai[i + 1] - adiag[i] - 1;
1602     aj = a->j + adiag[i] + 1;
1603     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1604     /* diag[i] */
1605     bj[k++]  = i;
1606     bdiag[i] = bdiag[i + 1] + nz + 1;
1607   }
1608 
1609   fact->factortype             = MAT_FACTOR_ILU;
1610   fact->info.factor_mallocs    = 0;
1611   fact->info.fill_ratio_given  = info->fill;
1612   fact->info.fill_ratio_needed = 1.0;
1613   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1614   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1615 
1616   b       = (Mat_SeqAIJ *)(fact)->data;
1617   b->row  = isrow;
1618   b->col  = iscol;
1619   b->icol = isicol;
1620   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1621   PetscCall(PetscObjectReference((PetscObject)isrow));
1622   PetscCall(PetscObjectReference((PetscObject)iscol));
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1627   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1628   IS                 isicol;
1629   const PetscInt    *r, *ic;
1630   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1631   PetscInt          *bi, *cols, nnz, *cols_lvl;
1632   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1633   PetscInt           i, levels, diagonal_fill;
1634   PetscBool          col_identity, row_identity, missing;
1635   PetscReal          f;
1636   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1637   PetscBT            lnkbt;
1638   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1639   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1640   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1641 
1642   PetscFunctionBegin;
1643   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1644   PetscCall(MatMissingDiagonal(A, &missing, &i));
1645   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1646 
1647   levels = (PetscInt)info->levels;
1648   PetscCall(ISIdentity(isrow, &row_identity));
1649   PetscCall(ISIdentity(iscol, &col_identity));
1650   if (!levels && row_identity && col_identity) {
1651     /* special case: ilu(0) with natural ordering */
1652     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1653     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1654     PetscFunctionReturn(0);
1655   }
1656 
1657   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1658   PetscCall(ISGetIndices(isrow, &r));
1659   PetscCall(ISGetIndices(isicol, &ic));
1660 
1661   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1662   PetscCall(PetscMalloc1(n + 1, &bi));
1663   PetscCall(PetscMalloc1(n + 1, &bdiag));
1664   bi[0] = bdiag[0] = 0;
1665   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1666 
1667   /* create a linked list for storing column indices of the active row */
1668   nlnk = n + 1;
1669   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1670 
1671   /* initial FreeSpace size is f*(ai[n]+1) */
1672   f             = info->fill;
1673   diagonal_fill = (PetscInt)info->diagonal_fill;
1674   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1675   current_space = free_space;
1676   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1677   current_space_lvl = free_space_lvl;
1678   for (i = 0; i < n; i++) {
1679     nzi = 0;
1680     /* copy current row into linked list */
1681     nnz = ai[r[i] + 1] - ai[r[i]];
1682     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1683     cols   = aj + ai[r[i]];
1684     lnk[i] = -1; /* marker to indicate if diagonal exists */
1685     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1686     nzi += nlnk;
1687 
1688     /* make sure diagonal entry is included */
1689     if (diagonal_fill && lnk[i] == -1) {
1690       fm = n;
1691       while (lnk[fm] < i) fm = lnk[fm];
1692       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1693       lnk[fm]    = i;
1694       lnk_lvl[i] = 0;
1695       nzi++;
1696       dcount++;
1697     }
1698 
1699     /* add pivot rows into the active row */
1700     nzbd = 0;
1701     prow = lnk[n];
1702     while (prow < i) {
1703       nnz      = bdiag[prow];
1704       cols     = bj_ptr[prow] + nnz + 1;
1705       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1706       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1707       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1708       nzi += nlnk;
1709       prow = lnk[prow];
1710       nzbd++;
1711     }
1712     bdiag[i]  = nzbd;
1713     bi[i + 1] = bi[i] + nzi;
1714     /* if free space is not available, make more free space */
1715     if (current_space->local_remaining < nzi) {
1716       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1717       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1718       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1719       reallocs++;
1720     }
1721 
1722     /* copy data into free_space and free_space_lvl, then initialize lnk */
1723     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1724     bj_ptr[i]    = current_space->array;
1725     bjlvl_ptr[i] = current_space_lvl->array;
1726 
1727     /* make sure the active row i has diagonal entry */
1728     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1729 
1730     current_space->array += nzi;
1731     current_space->local_used += nzi;
1732     current_space->local_remaining -= nzi;
1733     current_space_lvl->array += nzi;
1734     current_space_lvl->local_used += nzi;
1735     current_space_lvl->local_remaining -= nzi;
1736   }
1737 
1738   PetscCall(ISRestoreIndices(isrow, &r));
1739   PetscCall(ISRestoreIndices(isicol, &ic));
1740   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1741   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1742   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1743 
1744   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1745   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1746   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1747 
1748 #if defined(PETSC_USE_INFO)
1749   {
1750     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1751     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1752     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1753     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1754     PetscCall(PetscInfo(A, "for best performance.\n"));
1755     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1756   }
1757 #endif
1758   /* put together the new matrix */
1759   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1760   b = (Mat_SeqAIJ *)(fact)->data;
1761 
1762   b->free_a       = PETSC_TRUE;
1763   b->free_ij      = PETSC_TRUE;
1764   b->singlemalloc = PETSC_FALSE;
1765 
1766   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1767 
1768   b->j    = bj;
1769   b->i    = bi;
1770   b->diag = bdiag;
1771   b->ilen = NULL;
1772   b->imax = NULL;
1773   b->row  = isrow;
1774   b->col  = iscol;
1775   PetscCall(PetscObjectReference((PetscObject)isrow));
1776   PetscCall(PetscObjectReference((PetscObject)iscol));
1777   b->icol = isicol;
1778 
1779   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1780   /* In b structure:  Free imax, ilen, old a, old j.
1781      Allocate bdiag, solve_work, new a, new j */
1782   b->maxnz = b->nz = bdiag[0] + 1;
1783 
1784   (fact)->info.factor_mallocs    = reallocs;
1785   (fact)->info.fill_ratio_given  = f;
1786   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1787   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1788   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1789   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1794   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1795   IS                 isicol;
1796   const PetscInt    *r, *ic;
1797   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1798   PetscInt          *bi, *cols, nnz, *cols_lvl;
1799   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1800   PetscInt           i, levels, diagonal_fill;
1801   PetscBool          col_identity, row_identity;
1802   PetscReal          f;
1803   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1804   PetscBT            lnkbt;
1805   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1806   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1807   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1808   PetscBool          missing;
1809 
1810   PetscFunctionBegin;
1811   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1812   PetscCall(MatMissingDiagonal(A, &missing, &i));
1813   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1814 
1815   f             = info->fill;
1816   levels        = (PetscInt)info->levels;
1817   diagonal_fill = (PetscInt)info->diagonal_fill;
1818 
1819   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1820 
1821   PetscCall(ISIdentity(isrow, &row_identity));
1822   PetscCall(ISIdentity(iscol, &col_identity));
1823   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1824     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1825 
1826     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1827     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1828     fact->factortype               = MAT_FACTOR_ILU;
1829     (fact)->info.factor_mallocs    = 0;
1830     (fact)->info.fill_ratio_given  = info->fill;
1831     (fact)->info.fill_ratio_needed = 1.0;
1832 
1833     b       = (Mat_SeqAIJ *)(fact)->data;
1834     b->row  = isrow;
1835     b->col  = iscol;
1836     b->icol = isicol;
1837     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1838     PetscCall(PetscObjectReference((PetscObject)isrow));
1839     PetscCall(PetscObjectReference((PetscObject)iscol));
1840     PetscFunctionReturn(0);
1841   }
1842 
1843   PetscCall(ISGetIndices(isrow, &r));
1844   PetscCall(ISGetIndices(isicol, &ic));
1845 
1846   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1847   PetscCall(PetscMalloc1(n + 1, &bi));
1848   PetscCall(PetscMalloc1(n + 1, &bdiag));
1849   bi[0] = bdiag[0] = 0;
1850 
1851   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1852 
1853   /* create a linked list for storing column indices of the active row */
1854   nlnk = n + 1;
1855   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1856 
1857   /* initial FreeSpace size is f*(ai[n]+1) */
1858   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1859   current_space = free_space;
1860   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1861   current_space_lvl = free_space_lvl;
1862 
1863   for (i = 0; i < n; i++) {
1864     nzi = 0;
1865     /* copy current row into linked list */
1866     nnz = ai[r[i] + 1] - ai[r[i]];
1867     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1868     cols   = aj + ai[r[i]];
1869     lnk[i] = -1; /* marker to indicate if diagonal exists */
1870     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1871     nzi += nlnk;
1872 
1873     /* make sure diagonal entry is included */
1874     if (diagonal_fill && lnk[i] == -1) {
1875       fm = n;
1876       while (lnk[fm] < i) fm = lnk[fm];
1877       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1878       lnk[fm]    = i;
1879       lnk_lvl[i] = 0;
1880       nzi++;
1881       dcount++;
1882     }
1883 
1884     /* add pivot rows into the active row */
1885     nzbd = 0;
1886     prow = lnk[n];
1887     while (prow < i) {
1888       nnz      = bdiag[prow];
1889       cols     = bj_ptr[prow] + nnz + 1;
1890       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1891       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1892       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1893       nzi += nlnk;
1894       prow = lnk[prow];
1895       nzbd++;
1896     }
1897     bdiag[i]  = nzbd;
1898     bi[i + 1] = bi[i] + nzi;
1899 
1900     /* if free space is not available, make more free space */
1901     if (current_space->local_remaining < nzi) {
1902       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1903       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1904       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1905       reallocs++;
1906     }
1907 
1908     /* copy data into free_space and free_space_lvl, then initialize lnk */
1909     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1910     bj_ptr[i]    = current_space->array;
1911     bjlvl_ptr[i] = current_space_lvl->array;
1912 
1913     /* make sure the active row i has diagonal entry */
1914     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1915 
1916     current_space->array += nzi;
1917     current_space->local_used += nzi;
1918     current_space->local_remaining -= nzi;
1919     current_space_lvl->array += nzi;
1920     current_space_lvl->local_used += nzi;
1921     current_space_lvl->local_remaining -= nzi;
1922   }
1923 
1924   PetscCall(ISRestoreIndices(isrow, &r));
1925   PetscCall(ISRestoreIndices(isicol, &ic));
1926 
1927   /* destroy list of free space and other temporary arrays */
1928   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1929   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1930   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1931   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1932   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1933 
1934 #if defined(PETSC_USE_INFO)
1935   {
1936     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1937     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1938     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1939     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1940     PetscCall(PetscInfo(A, "for best performance.\n"));
1941     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1942   }
1943 #endif
1944 
1945   /* put together the new matrix */
1946   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1947   b = (Mat_SeqAIJ *)(fact)->data;
1948 
1949   b->free_a       = PETSC_TRUE;
1950   b->free_ij      = PETSC_TRUE;
1951   b->singlemalloc = PETSC_FALSE;
1952 
1953   PetscCall(PetscMalloc1(bi[n], &b->a));
1954   b->j = bj;
1955   b->i = bi;
1956   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1957   b->diag = bdiag;
1958   b->ilen = NULL;
1959   b->imax = NULL;
1960   b->row  = isrow;
1961   b->col  = iscol;
1962   PetscCall(PetscObjectReference((PetscObject)isrow));
1963   PetscCall(PetscObjectReference((PetscObject)iscol));
1964   b->icol = isicol;
1965   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1966   /* In b structure:  Free imax, ilen, old a, old j.
1967      Allocate bdiag, solve_work, new a, new j */
1968   b->maxnz = b->nz = bi[n];
1969 
1970   (fact)->info.factor_mallocs    = reallocs;
1971   (fact)->info.fill_ratio_given  = f;
1972   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
1974   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
1979   Mat             C  = B;
1980   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
1981   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
1982   IS              ip = b->row, iip = b->icol;
1983   const PetscInt *rip, *riip;
1984   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1985   PetscInt       *ai = a->i, *aj = a->j;
1986   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1987   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1988   PetscBool       perm_identity;
1989   FactorShiftCtx  sctx;
1990   PetscReal       rs;
1991   MatScalar       d, *v;
1992 
1993   PetscFunctionBegin;
1994   /* MatPivotSetUp(): initialize shift context sctx */
1995   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1996 
1997   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1998     sctx.shift_top = info->zeropivot;
1999     for (i = 0; i < mbs; i++) {
2000       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2001       d  = (aa)[a->diag[i]];
2002       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2003       v  = aa + ai[i];
2004       nz = ai[i + 1] - ai[i];
2005       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2006       if (rs > sctx.shift_top) sctx.shift_top = rs;
2007     }
2008     sctx.shift_top *= 1.1;
2009     sctx.nshift_max = 5;
2010     sctx.shift_lo   = 0.;
2011     sctx.shift_hi   = 1.;
2012   }
2013 
2014   PetscCall(ISGetIndices(ip, &rip));
2015   PetscCall(ISGetIndices(iip, &riip));
2016 
2017   /* allocate working arrays
2018      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2019      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2020   */
2021   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2022 
2023   do {
2024     sctx.newshift = PETSC_FALSE;
2025 
2026     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2027     if (mbs) il[0] = 0;
2028 
2029     for (k = 0; k < mbs; k++) {
2030       /* zero rtmp */
2031       nz    = bi[k + 1] - bi[k];
2032       bjtmp = bj + bi[k];
2033       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2034 
2035       /* load in initial unfactored row */
2036       bval = ba + bi[k];
2037       jmin = ai[rip[k]];
2038       jmax = ai[rip[k] + 1];
2039       for (j = jmin; j < jmax; j++) {
2040         col = riip[aj[j]];
2041         if (col >= k) { /* only take upper triangular entry */
2042           rtmp[col] = aa[j];
2043           *bval++   = 0.0; /* for in-place factorization */
2044         }
2045       }
2046       /* shift the diagonal of the matrix: ZeropivotApply() */
2047       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2048 
2049       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2050       dk = rtmp[k];
2051       i  = c2r[k]; /* first row to be added to k_th row  */
2052 
2053       while (i < k) {
2054         nexti = c2r[i]; /* next row to be added to k_th row */
2055 
2056         /* compute multiplier, update diag(k) and U(i,k) */
2057         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2058         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2059         dk += uikdi * ba[ili];           /* update diag[k] */
2060         ba[ili] = uikdi;                 /* -U(i,k) */
2061 
2062         /* add multiple of row i to k-th row */
2063         jmin = ili + 1;
2064         jmax = bi[i + 1];
2065         if (jmin < jmax) {
2066           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2067           /* update il and c2r for row i */
2068           il[i]  = jmin;
2069           j      = bj[jmin];
2070           c2r[i] = c2r[j];
2071           c2r[j] = i;
2072         }
2073         i = nexti;
2074       }
2075 
2076       /* copy data into U(k,:) */
2077       rs   = 0.0;
2078       jmin = bi[k];
2079       jmax = bi[k + 1] - 1;
2080       if (jmin < jmax) {
2081         for (j = jmin; j < jmax; j++) {
2082           col   = bj[j];
2083           ba[j] = rtmp[col];
2084           rs += PetscAbsScalar(ba[j]);
2085         }
2086         /* add the k-th row into il and c2r */
2087         il[k]  = jmin;
2088         i      = bj[jmin];
2089         c2r[k] = c2r[i];
2090         c2r[i] = k;
2091       }
2092 
2093       /* MatPivotCheck() */
2094       sctx.rs = rs;
2095       sctx.pv = dk;
2096       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2097       if (sctx.newshift) break;
2098       dk = sctx.pv;
2099 
2100       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2101     }
2102   } while (sctx.newshift);
2103 
2104   PetscCall(PetscFree3(rtmp, il, c2r));
2105   PetscCall(ISRestoreIndices(ip, &rip));
2106   PetscCall(ISRestoreIndices(iip, &riip));
2107 
2108   PetscCall(ISIdentity(ip, &perm_identity));
2109   if (perm_identity) {
2110     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2111     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2112     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2113     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2114   } else {
2115     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2116     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2117     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2118     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2119   }
2120 
2121   C->assembled    = PETSC_TRUE;
2122   C->preallocated = PETSC_TRUE;
2123 
2124   PetscCall(PetscLogFlops(C->rmap->n));
2125 
2126   /* MatPivotView() */
2127   if (sctx.nshift) {
2128     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2129       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2130     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2131       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2132     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2133       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2134     }
2135   }
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
2140   Mat             C  = B;
2141   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2142   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2143   IS              ip = b->row, iip = b->icol;
2144   const PetscInt *rip, *riip;
2145   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2146   PetscInt       *ai = a->i, *aj = a->j;
2147   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2148   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2149   PetscBool       perm_identity;
2150   FactorShiftCtx  sctx;
2151   PetscReal       rs;
2152   MatScalar       d, *v;
2153 
2154   PetscFunctionBegin;
2155   /* MatPivotSetUp(): initialize shift context sctx */
2156   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2157 
2158   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2159     sctx.shift_top = info->zeropivot;
2160     for (i = 0; i < mbs; i++) {
2161       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2162       d  = (aa)[a->diag[i]];
2163       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2164       v  = aa + ai[i];
2165       nz = ai[i + 1] - ai[i];
2166       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2167       if (rs > sctx.shift_top) sctx.shift_top = rs;
2168     }
2169     sctx.shift_top *= 1.1;
2170     sctx.nshift_max = 5;
2171     sctx.shift_lo   = 0.;
2172     sctx.shift_hi   = 1.;
2173   }
2174 
2175   PetscCall(ISGetIndices(ip, &rip));
2176   PetscCall(ISGetIndices(iip, &riip));
2177 
2178   /* initialization */
2179   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2180 
2181   do {
2182     sctx.newshift = PETSC_FALSE;
2183 
2184     for (i = 0; i < mbs; i++) jl[i] = mbs;
2185     il[0] = 0;
2186 
2187     for (k = 0; k < mbs; k++) {
2188       /* zero rtmp */
2189       nz    = bi[k + 1] - bi[k];
2190       bjtmp = bj + bi[k];
2191       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2192 
2193       bval = ba + bi[k];
2194       /* initialize k-th row by the perm[k]-th row of A */
2195       jmin = ai[rip[k]];
2196       jmax = ai[rip[k] + 1];
2197       for (j = jmin; j < jmax; j++) {
2198         col = riip[aj[j]];
2199         if (col >= k) { /* only take upper triangular entry */
2200           rtmp[col] = aa[j];
2201           *bval++   = 0.0; /* for in-place factorization */
2202         }
2203       }
2204       /* shift the diagonal of the matrix */
2205       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2206 
2207       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2208       dk = rtmp[k];
2209       i  = jl[k]; /* first row to be added to k_th row  */
2210 
2211       while (i < k) {
2212         nexti = jl[i]; /* next row to be added to k_th row */
2213 
2214         /* compute multiplier, update diag(k) and U(i,k) */
2215         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2216         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2217         dk += uikdi * ba[ili];
2218         ba[ili] = uikdi; /* -U(i,k) */
2219 
2220         /* add multiple of row i to k-th row */
2221         jmin = ili + 1;
2222         jmax = bi[i + 1];
2223         if (jmin < jmax) {
2224           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2225           /* update il and jl for row i */
2226           il[i] = jmin;
2227           j     = bj[jmin];
2228           jl[i] = jl[j];
2229           jl[j] = i;
2230         }
2231         i = nexti;
2232       }
2233 
2234       /* shift the diagonals when zero pivot is detected */
2235       /* compute rs=sum of abs(off-diagonal) */
2236       rs   = 0.0;
2237       jmin = bi[k] + 1;
2238       nz   = bi[k + 1] - jmin;
2239       bcol = bj + jmin;
2240       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2241 
2242       sctx.rs = rs;
2243       sctx.pv = dk;
2244       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2245       if (sctx.newshift) break;
2246       dk = sctx.pv;
2247 
2248       /* copy data into U(k,:) */
2249       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2250       jmin      = bi[k] + 1;
2251       jmax      = bi[k + 1];
2252       if (jmin < jmax) {
2253         for (j = jmin; j < jmax; j++) {
2254           col   = bj[j];
2255           ba[j] = rtmp[col];
2256         }
2257         /* add the k-th row into il and jl */
2258         il[k] = jmin;
2259         i     = bj[jmin];
2260         jl[k] = jl[i];
2261         jl[i] = k;
2262       }
2263     }
2264   } while (sctx.newshift);
2265 
2266   PetscCall(PetscFree3(rtmp, il, jl));
2267   PetscCall(ISRestoreIndices(ip, &rip));
2268   PetscCall(ISRestoreIndices(iip, &riip));
2269 
2270   PetscCall(ISIdentity(ip, &perm_identity));
2271   if (perm_identity) {
2272     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2273     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2274     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2275     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2276   } else {
2277     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2278     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2279     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2280     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2281   }
2282 
2283   C->assembled    = PETSC_TRUE;
2284   C->preallocated = PETSC_TRUE;
2285 
2286   PetscCall(PetscLogFlops(C->rmap->n));
2287   if (sctx.nshift) {
2288     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2289       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2290     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2291       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2292     }
2293   }
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 /*
2298    icc() under revised new data structure.
2299    Factored arrays bj and ba are stored as
2300      U(0,:),...,U(i,:),U(n-1,:)
2301 
2302    ui=fact->i is an array of size n+1, in which
2303    ui+
2304      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2305      ui[n]:  points to U(n-1,n-1)+1
2306 
2307   udiag=fact->diag is an array of size n,in which
2308      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2309 
2310    U(i,:) contains udiag[i] as its last entry, i.e.,
2311     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2312 */
2313 
2314 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2315   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2316   Mat_SeqSBAIJ      *b;
2317   PetscBool          perm_identity, missing;
2318   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2319   const PetscInt    *rip, *riip;
2320   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2321   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2322   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2323   PetscReal          fill = info->fill, levels = info->levels;
2324   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2325   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2326   PetscBT            lnkbt;
2327   IS                 iperm;
2328 
2329   PetscFunctionBegin;
2330   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2331   PetscCall(MatMissingDiagonal(A, &missing, &d));
2332   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2333   PetscCall(ISIdentity(perm, &perm_identity));
2334   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2335 
2336   PetscCall(PetscMalloc1(am + 1, &ui));
2337   PetscCall(PetscMalloc1(am + 1, &udiag));
2338   ui[0] = 0;
2339 
2340   /* ICC(0) without matrix ordering: simply rearrange column indices */
2341   if (!levels && perm_identity) {
2342     for (i = 0; i < am; i++) {
2343       ncols     = ai[i + 1] - a->diag[i];
2344       ui[i + 1] = ui[i] + ncols;
2345       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2346     }
2347     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2348     cols = uj;
2349     for (i = 0; i < am; i++) {
2350       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2351       ncols = ai[i + 1] - a->diag[i] - 1;
2352       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2353       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2354     }
2355   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2356     PetscCall(ISGetIndices(iperm, &riip));
2357     PetscCall(ISGetIndices(perm, &rip));
2358 
2359     /* initialization */
2360     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2361 
2362     /* jl: linked list for storing indices of the pivot rows
2363        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2364     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2365     for (i = 0; i < am; i++) {
2366       jl[i] = am;
2367       il[i] = 0;
2368     }
2369 
2370     /* create and initialize a linked list for storing column indices of the active row k */
2371     nlnk = am + 1;
2372     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2373 
2374     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2375     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2376     current_space = free_space;
2377     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2378     current_space_lvl = free_space_lvl;
2379 
2380     for (k = 0; k < am; k++) { /* for each active row k */
2381       /* initialize lnk by the column indices of row rip[k] of A */
2382       nzk   = 0;
2383       ncols = ai[rip[k] + 1] - ai[rip[k]];
2384       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2385       ncols_upper = 0;
2386       for (j = 0; j < ncols; j++) {
2387         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2388         if (riip[i] >= k) {         /* only take upper triangular entry */
2389           ajtmp[ncols_upper] = i;
2390           ncols_upper++;
2391         }
2392       }
2393       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2394       nzk += nlnk;
2395 
2396       /* update lnk by computing fill-in for each pivot row to be merged in */
2397       prow = jl[k]; /* 1st pivot row */
2398 
2399       while (prow < k) {
2400         nextprow = jl[prow];
2401 
2402         /* merge prow into k-th row */
2403         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2404         jmax  = ui[prow + 1];
2405         ncols = jmax - jmin;
2406         i     = jmin - ui[prow];
2407         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2408         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2409         j     = *(uj - 1);
2410         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2411         nzk += nlnk;
2412 
2413         /* update il and jl for prow */
2414         if (jmin < jmax) {
2415           il[prow] = jmin;
2416           j        = *cols;
2417           jl[prow] = jl[j];
2418           jl[j]    = prow;
2419         }
2420         prow = nextprow;
2421       }
2422 
2423       /* if free space is not available, make more free space */
2424       if (current_space->local_remaining < nzk) {
2425         i = am - k + 1;                                    /* num of unfactored rows */
2426         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2427         PetscCall(PetscFreeSpaceGet(i, &current_space));
2428         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2429         reallocs++;
2430       }
2431 
2432       /* copy data into free_space and free_space_lvl, then initialize lnk */
2433       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2434       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2435 
2436       /* add the k-th row into il and jl */
2437       if (nzk > 1) {
2438         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2439         jl[k] = jl[i];
2440         jl[i] = k;
2441         il[k] = ui[k] + 1;
2442       }
2443       uj_ptr[k]     = current_space->array;
2444       uj_lvl_ptr[k] = current_space_lvl->array;
2445 
2446       current_space->array += nzk;
2447       current_space->local_used += nzk;
2448       current_space->local_remaining -= nzk;
2449 
2450       current_space_lvl->array += nzk;
2451       current_space_lvl->local_used += nzk;
2452       current_space_lvl->local_remaining -= nzk;
2453 
2454       ui[k + 1] = ui[k] + nzk;
2455     }
2456 
2457     PetscCall(ISRestoreIndices(perm, &rip));
2458     PetscCall(ISRestoreIndices(iperm, &riip));
2459     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2460     PetscCall(PetscFree(ajtmp));
2461 
2462     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2463     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2464     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2465     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2466     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2467 
2468   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2469 
2470   /* put together the new matrix in MATSEQSBAIJ format */
2471   b               = (Mat_SeqSBAIJ *)(fact)->data;
2472   b->singlemalloc = PETSC_FALSE;
2473 
2474   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2475 
2476   b->j         = uj;
2477   b->i         = ui;
2478   b->diag      = udiag;
2479   b->free_diag = PETSC_TRUE;
2480   b->ilen      = NULL;
2481   b->imax      = NULL;
2482   b->row       = perm;
2483   b->col       = perm;
2484   PetscCall(PetscObjectReference((PetscObject)perm));
2485   PetscCall(PetscObjectReference((PetscObject)perm));
2486   b->icol          = iperm;
2487   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2488 
2489   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2490 
2491   b->maxnz = b->nz = ui[am];
2492   b->free_a        = PETSC_TRUE;
2493   b->free_ij       = PETSC_TRUE;
2494 
2495   fact->info.factor_mallocs   = reallocs;
2496   fact->info.fill_ratio_given = fill;
2497   if (ai[am] != 0) {
2498     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2499     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2500   } else {
2501     fact->info.fill_ratio_needed = 0.0;
2502   }
2503 #if defined(PETSC_USE_INFO)
2504   if (ai[am] != 0) {
2505     PetscReal af = fact->info.fill_ratio_needed;
2506     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2507     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2508     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2509   } else {
2510     PetscCall(PetscInfo(A, "Empty matrix\n"));
2511   }
2512 #endif
2513   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2518   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2519   Mat_SeqSBAIJ      *b;
2520   PetscBool          perm_identity, missing;
2521   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2522   const PetscInt    *rip, *riip;
2523   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2524   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2525   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2526   PetscReal          fill = info->fill, levels = info->levels;
2527   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2528   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2529   PetscBT            lnkbt;
2530   IS                 iperm;
2531 
2532   PetscFunctionBegin;
2533   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2534   PetscCall(MatMissingDiagonal(A, &missing, &d));
2535   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2536   PetscCall(ISIdentity(perm, &perm_identity));
2537   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2538 
2539   PetscCall(PetscMalloc1(am + 1, &ui));
2540   PetscCall(PetscMalloc1(am + 1, &udiag));
2541   ui[0] = 0;
2542 
2543   /* ICC(0) without matrix ordering: simply copies fill pattern */
2544   if (!levels && perm_identity) {
2545     for (i = 0; i < am; i++) {
2546       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2547       udiag[i]  = ui[i];
2548     }
2549     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2550     cols = uj;
2551     for (i = 0; i < am; i++) {
2552       aj    = a->j + a->diag[i];
2553       ncols = ui[i + 1] - ui[i];
2554       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2555     }
2556   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2557     PetscCall(ISGetIndices(iperm, &riip));
2558     PetscCall(ISGetIndices(perm, &rip));
2559 
2560     /* initialization */
2561     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2562 
2563     /* jl: linked list for storing indices of the pivot rows
2564        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2565     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2566     for (i = 0; i < am; i++) {
2567       jl[i] = am;
2568       il[i] = 0;
2569     }
2570 
2571     /* create and initialize a linked list for storing column indices of the active row k */
2572     nlnk = am + 1;
2573     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2574 
2575     /* initial FreeSpace size is fill*(ai[am]+1) */
2576     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2577     current_space = free_space;
2578     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2579     current_space_lvl = free_space_lvl;
2580 
2581     for (k = 0; k < am; k++) { /* for each active row k */
2582       /* initialize lnk by the column indices of row rip[k] of A */
2583       nzk   = 0;
2584       ncols = ai[rip[k] + 1] - ai[rip[k]];
2585       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2586       ncols_upper = 0;
2587       for (j = 0; j < ncols; j++) {
2588         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2589         if (riip[i] >= k) {         /* only take upper triangular entry */
2590           ajtmp[ncols_upper] = i;
2591           ncols_upper++;
2592         }
2593       }
2594       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2595       nzk += nlnk;
2596 
2597       /* update lnk by computing fill-in for each pivot row to be merged in */
2598       prow = jl[k]; /* 1st pivot row */
2599 
2600       while (prow < k) {
2601         nextprow = jl[prow];
2602 
2603         /* merge prow into k-th row */
2604         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2605         jmax  = ui[prow + 1];
2606         ncols = jmax - jmin;
2607         i     = jmin - ui[prow];
2608         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2609         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2610         j     = *(uj - 1);
2611         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2612         nzk += nlnk;
2613 
2614         /* update il and jl for prow */
2615         if (jmin < jmax) {
2616           il[prow] = jmin;
2617           j        = *cols;
2618           jl[prow] = jl[j];
2619           jl[j]    = prow;
2620         }
2621         prow = nextprow;
2622       }
2623 
2624       /* if free space is not available, make more free space */
2625       if (current_space->local_remaining < nzk) {
2626         i = am - k + 1;                                    /* num of unfactored rows */
2627         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2628         PetscCall(PetscFreeSpaceGet(i, &current_space));
2629         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2630         reallocs++;
2631       }
2632 
2633       /* copy data into free_space and free_space_lvl, then initialize lnk */
2634       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2635       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2636 
2637       /* add the k-th row into il and jl */
2638       if (nzk > 1) {
2639         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2640         jl[k] = jl[i];
2641         jl[i] = k;
2642         il[k] = ui[k] + 1;
2643       }
2644       uj_ptr[k]     = current_space->array;
2645       uj_lvl_ptr[k] = current_space_lvl->array;
2646 
2647       current_space->array += nzk;
2648       current_space->local_used += nzk;
2649       current_space->local_remaining -= nzk;
2650 
2651       current_space_lvl->array += nzk;
2652       current_space_lvl->local_used += nzk;
2653       current_space_lvl->local_remaining -= nzk;
2654 
2655       ui[k + 1] = ui[k] + nzk;
2656     }
2657 
2658 #if defined(PETSC_USE_INFO)
2659     if (ai[am] != 0) {
2660       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2661       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2662       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2663       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2664     } else {
2665       PetscCall(PetscInfo(A, "Empty matrix\n"));
2666     }
2667 #endif
2668 
2669     PetscCall(ISRestoreIndices(perm, &rip));
2670     PetscCall(ISRestoreIndices(iperm, &riip));
2671     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2672     PetscCall(PetscFree(ajtmp));
2673 
2674     /* destroy list of free space and other temporary array(s) */
2675     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2676     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2677     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2678     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2679 
2680   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2681 
2682   /* put together the new matrix in MATSEQSBAIJ format */
2683 
2684   b               = (Mat_SeqSBAIJ *)fact->data;
2685   b->singlemalloc = PETSC_FALSE;
2686 
2687   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2688 
2689   b->j         = uj;
2690   b->i         = ui;
2691   b->diag      = udiag;
2692   b->free_diag = PETSC_TRUE;
2693   b->ilen      = NULL;
2694   b->imax      = NULL;
2695   b->row       = perm;
2696   b->col       = perm;
2697 
2698   PetscCall(PetscObjectReference((PetscObject)perm));
2699   PetscCall(PetscObjectReference((PetscObject)perm));
2700 
2701   b->icol          = iperm;
2702   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2703   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2704   b->maxnz = b->nz = ui[am];
2705   b->free_a        = PETSC_TRUE;
2706   b->free_ij       = PETSC_TRUE;
2707 
2708   fact->info.factor_mallocs   = reallocs;
2709   fact->info.fill_ratio_given = fill;
2710   if (ai[am] != 0) {
2711     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2712   } else {
2713     fact->info.fill_ratio_needed = 0.0;
2714   }
2715   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2720   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2721   Mat_SeqSBAIJ      *b;
2722   PetscBool          perm_identity, missing;
2723   PetscReal          fill = info->fill;
2724   const PetscInt    *rip, *riip;
2725   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2726   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2727   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2728   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2729   PetscBT            lnkbt;
2730   IS                 iperm;
2731 
2732   PetscFunctionBegin;
2733   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2734   PetscCall(MatMissingDiagonal(A, &missing, &i));
2735   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2736 
2737   /* check whether perm is the identity mapping */
2738   PetscCall(ISIdentity(perm, &perm_identity));
2739   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2740   PetscCall(ISGetIndices(iperm, &riip));
2741   PetscCall(ISGetIndices(perm, &rip));
2742 
2743   /* initialization */
2744   PetscCall(PetscMalloc1(am + 1, &ui));
2745   PetscCall(PetscMalloc1(am + 1, &udiag));
2746   ui[0] = 0;
2747 
2748   /* jl: linked list for storing indices of the pivot rows
2749      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2750   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2751   for (i = 0; i < am; i++) {
2752     jl[i] = am;
2753     il[i] = 0;
2754   }
2755 
2756   /* create and initialize a linked list for storing column indices of the active row k */
2757   nlnk = am + 1;
2758   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2759 
2760   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2761   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2762   current_space = free_space;
2763 
2764   for (k = 0; k < am; k++) { /* for each active row k */
2765     /* initialize lnk by the column indices of row rip[k] of A */
2766     nzk   = 0;
2767     ncols = ai[rip[k] + 1] - ai[rip[k]];
2768     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2769     ncols_upper = 0;
2770     for (j = 0; j < ncols; j++) {
2771       i = riip[*(aj + ai[rip[k]] + j)];
2772       if (i >= k) { /* only take upper triangular entry */
2773         cols[ncols_upper] = i;
2774         ncols_upper++;
2775       }
2776     }
2777     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2778     nzk += nlnk;
2779 
2780     /* update lnk by computing fill-in for each pivot row to be merged in */
2781     prow = jl[k]; /* 1st pivot row */
2782 
2783     while (prow < k) {
2784       nextprow = jl[prow];
2785       /* merge prow into k-th row */
2786       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2787       jmax     = ui[prow + 1];
2788       ncols    = jmax - jmin;
2789       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2790       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2791       nzk += nlnk;
2792 
2793       /* update il and jl for prow */
2794       if (jmin < jmax) {
2795         il[prow] = jmin;
2796         j        = *uj_ptr;
2797         jl[prow] = jl[j];
2798         jl[j]    = prow;
2799       }
2800       prow = nextprow;
2801     }
2802 
2803     /* if free space is not available, make more free space */
2804     if (current_space->local_remaining < nzk) {
2805       i = am - k + 1;                                    /* num of unfactored rows */
2806       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2807       PetscCall(PetscFreeSpaceGet(i, &current_space));
2808       reallocs++;
2809     }
2810 
2811     /* copy data into free space, then initialize lnk */
2812     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2813 
2814     /* add the k-th row into il and jl */
2815     if (nzk > 1) {
2816       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2817       jl[k] = jl[i];
2818       jl[i] = k;
2819       il[k] = ui[k] + 1;
2820     }
2821     ui_ptr[k] = current_space->array;
2822 
2823     current_space->array += nzk;
2824     current_space->local_used += nzk;
2825     current_space->local_remaining -= nzk;
2826 
2827     ui[k + 1] = ui[k] + nzk;
2828   }
2829 
2830   PetscCall(ISRestoreIndices(perm, &rip));
2831   PetscCall(ISRestoreIndices(iperm, &riip));
2832   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2833 
2834   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2835   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2836   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2837   PetscCall(PetscLLDestroy(lnk, lnkbt));
2838 
2839   /* put together the new matrix in MATSEQSBAIJ format */
2840 
2841   b               = (Mat_SeqSBAIJ *)fact->data;
2842   b->singlemalloc = PETSC_FALSE;
2843   b->free_a       = PETSC_TRUE;
2844   b->free_ij      = PETSC_TRUE;
2845 
2846   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2847 
2848   b->j         = uj;
2849   b->i         = ui;
2850   b->diag      = udiag;
2851   b->free_diag = PETSC_TRUE;
2852   b->ilen      = NULL;
2853   b->imax      = NULL;
2854   b->row       = perm;
2855   b->col       = perm;
2856 
2857   PetscCall(PetscObjectReference((PetscObject)perm));
2858   PetscCall(PetscObjectReference((PetscObject)perm));
2859 
2860   b->icol          = iperm;
2861   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2862 
2863   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2864 
2865   b->maxnz = b->nz = ui[am];
2866 
2867   fact->info.factor_mallocs   = reallocs;
2868   fact->info.fill_ratio_given = fill;
2869   if (ai[am] != 0) {
2870     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2871     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2872   } else {
2873     fact->info.fill_ratio_needed = 0.0;
2874   }
2875 #if defined(PETSC_USE_INFO)
2876   if (ai[am] != 0) {
2877     PetscReal af = fact->info.fill_ratio_needed;
2878     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2879     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2880     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2881   } else {
2882     PetscCall(PetscInfo(A, "Empty matrix\n"));
2883   }
2884 #endif
2885   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2890   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2891   Mat_SeqSBAIJ      *b;
2892   PetscBool          perm_identity, missing;
2893   PetscReal          fill = info->fill;
2894   const PetscInt    *rip, *riip;
2895   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2896   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2897   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2898   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2899   PetscBT            lnkbt;
2900   IS                 iperm;
2901 
2902   PetscFunctionBegin;
2903   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2904   PetscCall(MatMissingDiagonal(A, &missing, &i));
2905   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2906 
2907   /* check whether perm is the identity mapping */
2908   PetscCall(ISIdentity(perm, &perm_identity));
2909   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2910   PetscCall(ISGetIndices(iperm, &riip));
2911   PetscCall(ISGetIndices(perm, &rip));
2912 
2913   /* initialization */
2914   PetscCall(PetscMalloc1(am + 1, &ui));
2915   ui[0] = 0;
2916 
2917   /* jl: linked list for storing indices of the pivot rows
2918      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2919   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2920   for (i = 0; i < am; i++) {
2921     jl[i] = am;
2922     il[i] = 0;
2923   }
2924 
2925   /* create and initialize a linked list for storing column indices of the active row k */
2926   nlnk = am + 1;
2927   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2928 
2929   /* initial FreeSpace size is fill*(ai[am]+1) */
2930   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2931   current_space = free_space;
2932 
2933   for (k = 0; k < am; k++) { /* for each active row k */
2934     /* initialize lnk by the column indices of row rip[k] of A */
2935     nzk   = 0;
2936     ncols = ai[rip[k] + 1] - ai[rip[k]];
2937     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2938     ncols_upper = 0;
2939     for (j = 0; j < ncols; j++) {
2940       i = riip[*(aj + ai[rip[k]] + j)];
2941       if (i >= k) { /* only take upper triangular entry */
2942         cols[ncols_upper] = i;
2943         ncols_upper++;
2944       }
2945     }
2946     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2947     nzk += nlnk;
2948 
2949     /* update lnk by computing fill-in for each pivot row to be merged in */
2950     prow = jl[k]; /* 1st pivot row */
2951 
2952     while (prow < k) {
2953       nextprow = jl[prow];
2954       /* merge prow into k-th row */
2955       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2956       jmax     = ui[prow + 1];
2957       ncols    = jmax - jmin;
2958       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2959       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2960       nzk += nlnk;
2961 
2962       /* update il and jl for prow */
2963       if (jmin < jmax) {
2964         il[prow] = jmin;
2965         j        = *uj_ptr;
2966         jl[prow] = jl[j];
2967         jl[j]    = prow;
2968       }
2969       prow = nextprow;
2970     }
2971 
2972     /* if free space is not available, make more free space */
2973     if (current_space->local_remaining < nzk) {
2974       i = am - k + 1;                     /* num of unfactored rows */
2975       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2976       PetscCall(PetscFreeSpaceGet(i, &current_space));
2977       reallocs++;
2978     }
2979 
2980     /* copy data into free space, then initialize lnk */
2981     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2982 
2983     /* add the k-th row into il and jl */
2984     if (nzk - 1 > 0) {
2985       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2986       jl[k] = jl[i];
2987       jl[i] = k;
2988       il[k] = ui[k] + 1;
2989     }
2990     ui_ptr[k] = current_space->array;
2991 
2992     current_space->array += nzk;
2993     current_space->local_used += nzk;
2994     current_space->local_remaining -= nzk;
2995 
2996     ui[k + 1] = ui[k] + nzk;
2997   }
2998 
2999 #if defined(PETSC_USE_INFO)
3000   if (ai[am] != 0) {
3001     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3002     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3003     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3004     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3005   } else {
3006     PetscCall(PetscInfo(A, "Empty matrix\n"));
3007   }
3008 #endif
3009 
3010   PetscCall(ISRestoreIndices(perm, &rip));
3011   PetscCall(ISRestoreIndices(iperm, &riip));
3012   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3013 
3014   /* destroy list of free space and other temporary array(s) */
3015   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3016   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3017   PetscCall(PetscLLDestroy(lnk, lnkbt));
3018 
3019   /* put together the new matrix in MATSEQSBAIJ format */
3020 
3021   b               = (Mat_SeqSBAIJ *)fact->data;
3022   b->singlemalloc = PETSC_FALSE;
3023   b->free_a       = PETSC_TRUE;
3024   b->free_ij      = PETSC_TRUE;
3025 
3026   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3027 
3028   b->j    = uj;
3029   b->i    = ui;
3030   b->diag = NULL;
3031   b->ilen = NULL;
3032   b->imax = NULL;
3033   b->row  = perm;
3034   b->col  = perm;
3035 
3036   PetscCall(PetscObjectReference((PetscObject)perm));
3037   PetscCall(PetscObjectReference((PetscObject)perm));
3038 
3039   b->icol          = iperm;
3040   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3041 
3042   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3043   b->maxnz = b->nz = ui[am];
3044 
3045   fact->info.factor_mallocs   = reallocs;
3046   fact->info.fill_ratio_given = fill;
3047   if (ai[am] != 0) {
3048     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3049   } else {
3050     fact->info.fill_ratio_needed = 0.0;
3051   }
3052   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) {
3057   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3058   PetscInt           n  = A->rmap->n;
3059   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3060   PetscScalar       *x, sum;
3061   const PetscScalar *b;
3062   const MatScalar   *aa = a->a, *v;
3063   PetscInt           i, nz;
3064 
3065   PetscFunctionBegin;
3066   if (!n) PetscFunctionReturn(0);
3067 
3068   PetscCall(VecGetArrayRead(bb, &b));
3069   PetscCall(VecGetArrayWrite(xx, &x));
3070 
3071   /* forward solve the lower triangular */
3072   x[0] = b[0];
3073   v    = aa;
3074   vi   = aj;
3075   for (i = 1; i < n; i++) {
3076     nz  = ai[i + 1] - ai[i];
3077     sum = b[i];
3078     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3079     v += nz;
3080     vi += nz;
3081     x[i] = sum;
3082   }
3083 
3084   /* backward solve the upper triangular */
3085   for (i = n - 1; i >= 0; i--) {
3086     v   = aa + adiag[i + 1] + 1;
3087     vi  = aj + adiag[i + 1] + 1;
3088     nz  = adiag[i] - adiag[i + 1] - 1;
3089     sum = x[i];
3090     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3091     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3092   }
3093 
3094   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3095   PetscCall(VecRestoreArrayRead(bb, &b));
3096   PetscCall(VecRestoreArrayWrite(xx, &x));
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) {
3101   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3102   IS                 iscol = a->col, isrow = a->row;
3103   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3104   const PetscInt    *rout, *cout, *r, *c;
3105   PetscScalar       *x, *tmp, sum;
3106   const PetscScalar *b;
3107   const MatScalar   *aa = a->a, *v;
3108 
3109   PetscFunctionBegin;
3110   if (!n) PetscFunctionReturn(0);
3111 
3112   PetscCall(VecGetArrayRead(bb, &b));
3113   PetscCall(VecGetArrayWrite(xx, &x));
3114   tmp = a->solve_work;
3115 
3116   PetscCall(ISGetIndices(isrow, &rout));
3117   r = rout;
3118   PetscCall(ISGetIndices(iscol, &cout));
3119   c = cout;
3120 
3121   /* forward solve the lower triangular */
3122   tmp[0] = b[r[0]];
3123   v      = aa;
3124   vi     = aj;
3125   for (i = 1; i < n; i++) {
3126     nz  = ai[i + 1] - ai[i];
3127     sum = b[r[i]];
3128     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3129     tmp[i] = sum;
3130     v += nz;
3131     vi += nz;
3132   }
3133 
3134   /* backward solve the upper triangular */
3135   for (i = n - 1; i >= 0; i--) {
3136     v   = aa + adiag[i + 1] + 1;
3137     vi  = aj + adiag[i + 1] + 1;
3138     nz  = adiag[i] - adiag[i + 1] - 1;
3139     sum = tmp[i];
3140     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3141     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3142   }
3143 
3144   PetscCall(ISRestoreIndices(isrow, &rout));
3145   PetscCall(ISRestoreIndices(iscol, &cout));
3146   PetscCall(VecRestoreArrayRead(bb, &b));
3147   PetscCall(VecRestoreArrayWrite(xx, &x));
3148   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 /*
3153     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3154 */
3155 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) {
3156   Mat             B = *fact;
3157   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3158   IS              isicol;
3159   const PetscInt *r, *ic;
3160   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3161   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3162   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3163   PetscInt        nlnk, *lnk;
3164   PetscBT         lnkbt;
3165   PetscBool       row_identity, icol_identity;
3166   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3167   const PetscInt *ics;
3168   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3169   PetscReal       dt = info->dt, shift = info->shiftamount;
3170   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3171   PetscBool       missing;
3172 
3173   PetscFunctionBegin;
3174   if (dt == PETSC_DEFAULT) dt = 0.005;
3175   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3176 
3177   /* ------- symbolic factorization, can be reused ---------*/
3178   PetscCall(MatMissingDiagonal(A, &missing, &i));
3179   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3180   adiag = a->diag;
3181 
3182   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3183 
3184   /* bdiag is location of diagonal in factor */
3185   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3186   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3187 
3188   /* allocate row pointers bi */
3189   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3190 
3191   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3192   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3193   nnz_max = ai[n] + 2 * n * dtcount + 2;
3194 
3195   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3196   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3197 
3198   /* put together the new matrix */
3199   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3200   b = (Mat_SeqAIJ *)B->data;
3201 
3202   b->free_a       = PETSC_TRUE;
3203   b->free_ij      = PETSC_TRUE;
3204   b->singlemalloc = PETSC_FALSE;
3205 
3206   b->a    = ba;
3207   b->j    = bj;
3208   b->i    = bi;
3209   b->diag = bdiag;
3210   b->ilen = NULL;
3211   b->imax = NULL;
3212   b->row  = isrow;
3213   b->col  = iscol;
3214   PetscCall(PetscObjectReference((PetscObject)isrow));
3215   PetscCall(PetscObjectReference((PetscObject)iscol));
3216   b->icol = isicol;
3217 
3218   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3219   b->maxnz = nnz_max;
3220 
3221   B->factortype            = MAT_FACTOR_ILUDT;
3222   B->info.factor_mallocs   = 0;
3223   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3224   /* ------- end of symbolic factorization ---------*/
3225 
3226   PetscCall(ISGetIndices(isrow, &r));
3227   PetscCall(ISGetIndices(isicol, &ic));
3228   ics = ic;
3229 
3230   /* linked list for storing column indices of the active row */
3231   nlnk = n + 1;
3232   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3233 
3234   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3235   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3236   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3237   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3238   PetscCall(PetscArrayzero(rtmp, n));
3239 
3240   bi[0]         = 0;
3241   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3242   bdiag_rev[n]  = bdiag[0];
3243   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3244   for (i = 0; i < n; i++) {
3245     /* copy initial fill into linked list */
3246     nzi = ai[r[i] + 1] - ai[r[i]];
3247     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3248     nzi_al = adiag[r[i]] - ai[r[i]];
3249     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3250     ajtmp  = aj + ai[r[i]];
3251     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3252 
3253     /* load in initial (unfactored row) */
3254     aatmp = a->a + ai[r[i]];
3255     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3256 
3257     /* add pivot rows into linked list */
3258     row = lnk[n];
3259     while (row < i) {
3260       nzi_bl = bi[row + 1] - bi[row] + 1;
3261       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3262       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3263       nzi += nlnk;
3264       row = lnk[row];
3265     }
3266 
3267     /* copy data from lnk into jtmp, then initialize lnk */
3268     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3269 
3270     /* numerical factorization */
3271     bjtmp = jtmp;
3272     row   = *bjtmp++; /* 1st pivot row */
3273     while (row < i) {
3274       pc         = rtmp + row;
3275       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3276       multiplier = (*pc) * (*pv);
3277       *pc        = multiplier;
3278       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3279         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3280         pv = ba + bdiag[row + 1] + 1;
3281         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3282         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3283         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3284       }
3285       row = *bjtmp++;
3286     }
3287 
3288     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3289     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3290     nzi_bl   = 0;
3291     j        = 0;
3292     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3293       vtmp[j]       = rtmp[jtmp[j]];
3294       rtmp[jtmp[j]] = 0.0;
3295       nzi_bl++;
3296       j++;
3297     }
3298     nzi_bu = nzi - nzi_bl - 1;
3299     while (j < nzi) {
3300       vtmp[j]       = rtmp[jtmp[j]];
3301       rtmp[jtmp[j]] = 0.0;
3302       j++;
3303     }
3304 
3305     bjtmp = bj + bi[i];
3306     batmp = ba + bi[i];
3307     /* apply level dropping rule to L part */
3308     ncut  = nzi_al + dtcount;
3309     if (ncut < nzi_bl) {
3310       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3311       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3312     } else {
3313       ncut = nzi_bl;
3314     }
3315     for (j = 0; j < ncut; j++) {
3316       bjtmp[j] = jtmp[j];
3317       batmp[j] = vtmp[j];
3318     }
3319     bi[i + 1] = bi[i] + ncut;
3320     nzi       = ncut + 1;
3321 
3322     /* apply level dropping rule to U part */
3323     ncut = nzi_au + dtcount;
3324     if (ncut < nzi_bu) {
3325       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3326       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3327     } else {
3328       ncut = nzi_bu;
3329     }
3330     nzi += ncut;
3331 
3332     /* mark bdiagonal */
3333     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3334     bdiag_rev[n - i - 1] = bdiag[i + 1];
3335     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3336     bjtmp                = bj + bdiag[i];
3337     batmp                = ba + bdiag[i];
3338     *bjtmp               = i;
3339     *batmp               = diag_tmp; /* rtmp[i]; */
3340     if (*batmp == 0.0) *batmp = dt + shift;
3341     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3342 
3343     bjtmp = bj + bdiag[i + 1] + 1;
3344     batmp = ba + bdiag[i + 1] + 1;
3345     for (k = 0; k < ncut; k++) {
3346       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3347       batmp[k] = vtmp[nzi_bl + 1 + k];
3348     }
3349 
3350     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3351   }              /* for (i=0; i<n; i++) */
3352   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3353 
3354   PetscCall(ISRestoreIndices(isrow, &r));
3355   PetscCall(ISRestoreIndices(isicol, &ic));
3356 
3357   PetscCall(PetscLLDestroy(lnk, lnkbt));
3358   PetscCall(PetscFree2(im, jtmp));
3359   PetscCall(PetscFree2(rtmp, vtmp));
3360   PetscCall(PetscFree(bdiag_rev));
3361 
3362   PetscCall(PetscLogFlops(B->cmap->n));
3363   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3364 
3365   PetscCall(ISIdentity(isrow, &row_identity));
3366   PetscCall(ISIdentity(isicol, &icol_identity));
3367   if (row_identity && icol_identity) {
3368     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3369   } else {
3370     B->ops->solve = MatSolve_SeqAIJ;
3371   }
3372 
3373   B->ops->solveadd          = NULL;
3374   B->ops->solvetranspose    = NULL;
3375   B->ops->solvetransposeadd = NULL;
3376   B->ops->matsolve          = NULL;
3377   B->assembled              = PETSC_TRUE;
3378   B->preallocated           = PETSC_TRUE;
3379   PetscFunctionReturn(0);
3380 }
3381 
3382 /* a wraper of MatILUDTFactor_SeqAIJ() */
3383 /*
3384     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3385 */
3386 
3387 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) {
3388   PetscFunctionBegin;
3389   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /*
3394    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3395    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3396 */
3397 /*
3398     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3399 */
3400 
3401 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) {
3402   Mat             C = fact;
3403   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3404   IS              isrow = b->row, isicol = b->icol;
3405   const PetscInt *r, *ic, *ics;
3406   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3407   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3408   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3409   PetscReal       dt = info->dt, shift = info->shiftamount;
3410   PetscBool       row_identity, col_identity;
3411 
3412   PetscFunctionBegin;
3413   PetscCall(ISGetIndices(isrow, &r));
3414   PetscCall(ISGetIndices(isicol, &ic));
3415   PetscCall(PetscMalloc1(n + 1, &rtmp));
3416   ics = ic;
3417 
3418   for (i = 0; i < n; i++) {
3419     /* initialize rtmp array */
3420     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3421     bjtmp = bj + bi[i];
3422     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3423     rtmp[i] = 0.0;
3424     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3425     bjtmp   = bj + bdiag[i + 1] + 1;
3426     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3427 
3428     /* load in initial unfactored row of A */
3429     nz    = ai[r[i] + 1] - ai[r[i]];
3430     ajtmp = aj + ai[r[i]];
3431     v     = aa + ai[r[i]];
3432     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3433 
3434     /* numerical factorization */
3435     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3436     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3437     k     = 0;
3438     while (k < nzl) {
3439       row        = *bjtmp++;
3440       pc         = rtmp + row;
3441       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3442       multiplier = (*pc) * (*pv);
3443       *pc        = multiplier;
3444       if (PetscAbsScalar(multiplier) > dt) {
3445         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3446         pv = b->a + bdiag[row + 1] + 1;
3447         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3448         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3449         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3450       }
3451       k++;
3452     }
3453 
3454     /* finished row so stick it into b->a */
3455     /* L-part */
3456     pv  = b->a + bi[i];
3457     pj  = bj + bi[i];
3458     nzl = bi[i + 1] - bi[i];
3459     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3460 
3461     /* diagonal: invert diagonal entries for simpler triangular solves */
3462     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3463     b->a[bdiag[i]] = 1.0 / rtmp[i];
3464 
3465     /* U-part */
3466     pv  = b->a + bdiag[i + 1] + 1;
3467     pj  = bj + bdiag[i + 1] + 1;
3468     nzu = bdiag[i] - bdiag[i + 1] - 1;
3469     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3470   }
3471 
3472   PetscCall(PetscFree(rtmp));
3473   PetscCall(ISRestoreIndices(isicol, &ic));
3474   PetscCall(ISRestoreIndices(isrow, &r));
3475 
3476   PetscCall(ISIdentity(isrow, &row_identity));
3477   PetscCall(ISIdentity(isicol, &col_identity));
3478   if (row_identity && col_identity) {
3479     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3480   } else {
3481     C->ops->solve = MatSolve_SeqAIJ;
3482   }
3483   C->ops->solveadd          = NULL;
3484   C->ops->solvetranspose    = NULL;
3485   C->ops->solvetransposeadd = NULL;
3486   C->ops->matsolve          = NULL;
3487   C->assembled              = PETSC_TRUE;
3488   C->preallocated           = PETSC_TRUE;
3489 
3490   PetscCall(PetscLogFlops(C->cmap->n));
3491   PetscFunctionReturn(0);
3492 }
3493