xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 #if 0
13 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
14 {
15   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
16   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
17   const PetscInt    *ai = a->i, *aj = a->j;
18   const PetscScalar *aa = a->a;
19   PetscBool         *done;
20   PetscReal          best, past = 0, future;
21 
22   PetscFunctionBegin;
23   /* pick initial row */
24   best = -1;
25   for (i = 0; i < n; i++) {
26     future = 0.0;
27     for (j = ai[i]; j < ai[i + 1]; j++) {
28       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29       else past = PetscAbsScalar(aa[j]);
30     }
31     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32     if (past / future > best) {
33       best    = past / future;
34       current = i;
35     }
36   }
37 
38   PetscCall(PetscMalloc1(n, &done));
39   PetscCall(PetscArrayzero(done, n));
40   PetscCall(PetscMalloc1(n, &order));
41   order[0] = current;
42   for (i = 0; i < n - 1; i++) {
43     done[current] = PETSC_TRUE;
44     best          = -1;
45     /* loop over all neighbors of current pivot */
46     for (j = ai[current]; j < ai[current + 1]; j++) {
47       jj = aj[j];
48       if (done[jj]) continue;
49       /* loop over columns of potential next row computing weights for below and above diagonal */
50       past = future = 0.0;
51       for (k = ai[jj]; k < ai[jj + 1]; k++) {
52         kk = aj[k];
53         if (done[kk]) past += PetscAbsScalar(aa[k]);
54         else if (kk != jj) future += PetscAbsScalar(aa[k]);
55       }
56       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57       if (past / future > best) {
58         best       = past / future;
59         newcurrent = jj;
60       }
61     }
62     if (best == -1) { /* no neighbors to select from so select best of all that remain */
63       best = -1;
64       for (k = 0; k < n; k++) {
65         if (done[k]) continue;
66         future = 0.0;
67         past   = 0.0;
68         for (j = ai[k]; j < ai[k + 1]; j++) {
69           kk = aj[j];
70           if (done[kk]) past += PetscAbsScalar(aa[j]);
71           else if (kk != k) future += PetscAbsScalar(aa[j]);
72         }
73         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74         if (past / future > best) {
75           best       = past / future;
76           newcurrent = k;
77         }
78       }
79     }
80     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
81     current      = newcurrent;
82     order[i + 1] = current;
83   }
84   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
85   *icol = *irow;
86   PetscCall(PetscObjectReference((PetscObject)*irow));
87   PetscCall(PetscFree(done));
88   PetscCall(PetscFree(order));
89   PetscFunctionReturn(PETSC_SUCCESS);
90 }
91 #endif
92 
93 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
94 {
95   PetscFunctionBegin;
96   *type = MATSOLVERPETSC;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
101 {
102   PetscInt n = A->rmap->n;
103 
104   PetscFunctionBegin;
105 #if defined(PETSC_USE_COMPLEX)
106   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
107 #endif
108   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
109   PetscCall(MatSetSizes(*B, n, n, n, n));
110   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
111     PetscCall(MatSetType(*B, MATSEQAIJ));
112 
113     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
114     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
115 
116     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
117     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
118     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
119     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
120   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
121     PetscCall(MatSetType(*B, MATSEQSBAIJ));
122     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
123 
124     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
125     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
127     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
128   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
129   (*B)->factortype = ftype;
130 
131   PetscCall(PetscFree((*B)->solvertype));
132   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
133   (*B)->canuseordering = PETSC_TRUE;
134   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
135   PetscFunctionReturn(PETSC_SUCCESS);
136 }
137 
138 #if 0
139 // currently unused
140 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
141 {
142   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
143   IS                 isicol;
144   const PetscInt    *r, *ic;
145   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
146   PetscInt          *bi, *bj, *ajtmp;
147   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
148   PetscReal          f;
149   PetscInt           nlnk, *lnk, k, **bi_ptr;
150   PetscFreeSpaceList free_space = NULL, current_space = NULL;
151   PetscBT            lnkbt;
152   PetscBool          missing;
153 
154   PetscFunctionBegin;
155   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
156   PetscCall(MatMissingDiagonal(A, &missing, &i));
157   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
158 
159   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
160   PetscCall(ISGetIndices(isrow, &r));
161   PetscCall(ISGetIndices(isicol, &ic));
162 
163   /* get new row pointers */
164   PetscCall(PetscMalloc1(n + 1, &bi));
165   bi[0] = 0;
166 
167   /* bdiag is location of diagonal in factor */
168   PetscCall(PetscMalloc1(n + 1, &bdiag));
169   bdiag[0] = 0;
170 
171   /* linked list for storing column indices of the active row */
172   nlnk = n + 1;
173   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
174 
175   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
176 
177   /* initial FreeSpace size is f*(ai[n]+1) */
178   f = info->fill;
179   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
180   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
181   current_space = free_space;
182 
183   for (i = 0; i < n; i++) {
184     /* copy previous fill into linked list */
185     nzi   = 0;
186     nnz   = ai[r[i] + 1] - ai[r[i]];
187     ajtmp = aj + ai[r[i]];
188     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
189     nzi += nlnk;
190 
191     /* add pivot rows into linked list */
192     row = lnk[n];
193     while (row < i) {
194       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
195       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
196       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
197       nzi += nlnk;
198       row = lnk[row];
199     }
200     bi[i + 1] = bi[i] + nzi;
201     im[i]     = nzi;
202 
203     /* mark bdiag */
204     nzbd = 0;
205     nnz  = nzi;
206     k    = lnk[n];
207     while (nnz-- && k < i) {
208       nzbd++;
209       k = lnk[k];
210     }
211     bdiag[i] = bi[i] + nzbd;
212 
213     /* if free space is not available, make more free space */
214     if (current_space->local_remaining < nzi) {
215       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
216       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
217       reallocs++;
218     }
219 
220     /* copy data into free space, then initialize lnk */
221     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
222 
223     bi_ptr[i] = current_space->array;
224     current_space->array += nzi;
225     current_space->local_used += nzi;
226     current_space->local_remaining -= nzi;
227   }
228   #if defined(PETSC_USE_INFO)
229   if (ai[n] != 0) {
230     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
231     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
232     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
233     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
234     PetscCall(PetscInfo(A, "for best performance.\n"));
235   } else {
236     PetscCall(PetscInfo(A, "Empty matrix\n"));
237   }
238   #endif
239 
240   PetscCall(ISRestoreIndices(isrow, &r));
241   PetscCall(ISRestoreIndices(isicol, &ic));
242 
243   /* destroy list of free space and other temporary array(s) */
244   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
245   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
246   PetscCall(PetscLLDestroy(lnk, lnkbt));
247   PetscCall(PetscFree2(bi_ptr, im));
248 
249   /* put together the new matrix */
250   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
251   b = (Mat_SeqAIJ *)(B)->data;
252 
253   b->free_a       = PETSC_TRUE;
254   b->free_ij      = PETSC_TRUE;
255   b->singlemalloc = PETSC_FALSE;
256 
257   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
258   b->j    = bj;
259   b->i    = bi;
260   b->diag = bdiag;
261   b->ilen = NULL;
262   b->imax = NULL;
263   b->row  = isrow;
264   b->col  = iscol;
265   PetscCall(PetscObjectReference((PetscObject)isrow));
266   PetscCall(PetscObjectReference((PetscObject)iscol));
267   b->icol = isicol;
268   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   b->maxnz = b->nz = bi[n];
272 
273   (B)->factortype            = MAT_FACTOR_LU;
274   (B)->info.factor_mallocs   = reallocs;
275   (B)->info.fill_ratio_given = f;
276 
277   if (ai[n]) {
278     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279   } else {
280     (B)->info.fill_ratio_needed = 0.0;
281   }
282   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 #endif
287 
288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289 {
290   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291   IS                 isicol;
292   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293   PetscInt           i, n = A->rmap->n;
294   PetscInt          *bi, *bj;
295   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296   PetscReal          f;
297   PetscInt           nlnk, *lnk, k, **bi_ptr;
298   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299   PetscBT            lnkbt;
300   PetscBool          missing;
301 
302   PetscFunctionBegin;
303   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304   PetscCall(MatMissingDiagonal(A, &missing, &i));
305   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306 
307   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308   PetscCall(ISGetIndices(isrow, &r));
309   PetscCall(ISGetIndices(isicol, &ic));
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   PetscCall(PetscMalloc1(n + 1, &bi));
313   PetscCall(PetscMalloc1(n + 1, &bdiag));
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
319 
320   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326   current_space = free_space;
327 
328   for (i = 0; i < n; i++) {
329     /* copy previous fill into linked list */
330     nzi   = 0;
331     nnz   = ai[r[i] + 1] - ai[r[i]];
332     ajtmp = aj + ai[r[i]];
333     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i) {
339       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342       nzi += nlnk;
343       row = lnk[row];
344     }
345     bi[i + 1] = bi[i] + nzi;
346     im[i]     = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i) {
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining < nzi) {
360       /* estimated additional space needed */
361       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363       reallocs++;
364     }
365 
366     /* copy data into free space, then initialize lnk */
367     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
368 
369     bi_ptr[i] = current_space->array;
370     current_space->array += nzi;
371     current_space->local_used += nzi;
372     current_space->local_remaining -= nzi;
373   }
374 
375   PetscCall(ISRestoreIndices(isrow, &r));
376   PetscCall(ISRestoreIndices(isicol, &ic));
377 
378   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
380   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381   PetscCall(PetscLLDestroy(lnk, lnkbt));
382   PetscCall(PetscFree2(bi_ptr, im));
383 
384   /* put together the new matrix */
385   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386   b = (Mat_SeqAIJ *)(B)->data;
387 
388   b->free_a       = PETSC_TRUE;
389   b->free_ij      = PETSC_TRUE;
390   b->singlemalloc = PETSC_FALSE;
391 
392   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
393 
394   b->j    = bj;
395   b->i    = bi;
396   b->diag = bdiag;
397   b->ilen = NULL;
398   b->imax = NULL;
399   b->row  = isrow;
400   b->col  = iscol;
401   PetscCall(PetscObjectReference((PetscObject)isrow));
402   PetscCall(PetscObjectReference((PetscObject)iscol));
403   b->icol = isicol;
404   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
405 
406   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
407   b->maxnz = b->nz = bdiag[0] + 1;
408 
409   B->factortype            = MAT_FACTOR_LU;
410   B->info.factor_mallocs   = reallocs;
411   B->info.fill_ratio_given = f;
412 
413   if (ai[n]) {
414     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
415   } else {
416     B->info.fill_ratio_needed = 0.0;
417   }
418 #if defined(PETSC_USE_INFO)
419   if (ai[n] != 0) {
420     PetscReal af = B->info.fill_ratio_needed;
421     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
422     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
423     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
424     PetscCall(PetscInfo(A, "for best performance.\n"));
425   } else {
426     PetscCall(PetscInfo(A, "Empty matrix\n"));
427   }
428 #endif
429   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
430   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
431   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 /*
436     Trouble in factorization, should we dump the original matrix?
437 */
438 PetscErrorCode MatFactorDumpMatrix(Mat A)
439 {
440   PetscBool flg = PETSC_FALSE;
441 
442   PetscFunctionBegin;
443   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
444   if (flg) {
445     PetscViewer viewer;
446     char        filename[PETSC_MAX_PATH_LEN];
447 
448     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
449     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
450     PetscCall(MatView(A, viewer));
451     PetscCall(PetscViewerDestroy(&viewer));
452   }
453   PetscFunctionReturn(PETSC_SUCCESS);
454 }
455 
456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
457 {
458   Mat              C = B;
459   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
460   IS               isrow = b->row, isicol = b->icol;
461   const PetscInt  *r, *ic, *ics;
462   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
463   PetscInt         i, j, k, nz, nzL, row, *pj;
464   const PetscInt  *ajtmp, *bjtmp;
465   MatScalar       *rtmp, *pc, multiplier, *pv;
466   const MatScalar *aa = a->a, *v;
467   PetscBool        row_identity, col_identity;
468   FactorShiftCtx   sctx;
469   const PetscInt  *ddiag;
470   PetscReal        rs;
471   MatScalar        d;
472 
473   PetscFunctionBegin;
474   /* MatPivotSetUp(): initialize shift context sctx */
475   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
476 
477   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
478     ddiag          = a->diag;
479     sctx.shift_top = info->zeropivot;
480     for (i = 0; i < n; i++) {
481       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
482       d  = (aa)[ddiag[i]];
483       rs = -PetscAbsScalar(d) - PetscRealPart(d);
484       v  = aa + ai[i];
485       nz = ai[i + 1] - ai[i];
486       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
487       if (rs > sctx.shift_top) sctx.shift_top = rs;
488     }
489     sctx.shift_top *= 1.1;
490     sctx.nshift_max = 5;
491     sctx.shift_lo   = 0.;
492     sctx.shift_hi   = 1.;
493   }
494 
495   PetscCall(ISGetIndices(isrow, &r));
496   PetscCall(ISGetIndices(isicol, &ic));
497   PetscCall(PetscMalloc1(n + 1, &rtmp));
498   ics = ic;
499 
500   do {
501     sctx.newshift = PETSC_FALSE;
502     for (i = 0; i < n; i++) {
503       /* zero rtmp */
504       /* L part */
505       nz    = bi[i + 1] - bi[i];
506       bjtmp = bj + bi[i];
507       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
508 
509       /* U part */
510       nz    = bdiag[i] - bdiag[i + 1];
511       bjtmp = bj + bdiag[i + 1] + 1;
512       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
513 
514       /* load in initial (unfactored row) */
515       nz    = ai[r[i] + 1] - ai[r[i]];
516       ajtmp = aj + ai[r[i]];
517       v     = aa + ai[r[i]];
518       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
519       /* ZeropivotApply() */
520       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
521 
522       /* elimination */
523       bjtmp = bj + bi[i];
524       row   = *bjtmp++;
525       nzL   = bi[i + 1] - bi[i];
526       for (k = 0; k < nzL; k++) {
527         pc = rtmp + row;
528         if (*pc != 0.0) {
529           pv         = b->a + bdiag[row];
530           multiplier = *pc * (*pv);
531           *pc        = multiplier;
532 
533           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
534           pv = b->a + bdiag[row + 1] + 1;
535           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
536 
537           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
538           PetscCall(PetscLogFlops(1 + 2.0 * nz));
539         }
540         row = *bjtmp++;
541       }
542 
543       /* finished row so stick it into b->a */
544       rs = 0.0;
545       /* L part */
546       pv = b->a + bi[i];
547       pj = b->j + bi[i];
548       nz = bi[i + 1] - bi[i];
549       for (j = 0; j < nz; j++) {
550         pv[j] = rtmp[pj[j]];
551         rs += PetscAbsScalar(pv[j]);
552       }
553 
554       /* U part */
555       pv = b->a + bdiag[i + 1] + 1;
556       pj = b->j + bdiag[i + 1] + 1;
557       nz = bdiag[i] - bdiag[i + 1] - 1;
558       for (j = 0; j < nz; j++) {
559         pv[j] = rtmp[pj[j]];
560         rs += PetscAbsScalar(pv[j]);
561       }
562 
563       sctx.rs = rs;
564       sctx.pv = rtmp[i];
565       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
566       if (sctx.newshift) break; /* break for-loop */
567       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
568 
569       /* Mark diagonal and invert diagonal for simpler triangular solves */
570       pv  = b->a + bdiag[i];
571       *pv = 1.0 / rtmp[i];
572 
573     } /* endof for (i=0; i<n; i++) { */
574 
575     /* MatPivotRefine() */
576     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
577       /*
578        * if no shift in this attempt & shifting & started shifting & can refine,
579        * then try lower shift
580        */
581       sctx.shift_hi       = sctx.shift_fraction;
582       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
583       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
584       sctx.newshift       = PETSC_TRUE;
585       sctx.nshift++;
586     }
587   } while (sctx.newshift);
588 
589   PetscCall(PetscFree(rtmp));
590   PetscCall(ISRestoreIndices(isicol, &ic));
591   PetscCall(ISRestoreIndices(isrow, &r));
592 
593   PetscCall(ISIdentity(isrow, &row_identity));
594   PetscCall(ISIdentity(isicol, &col_identity));
595   if (b->inode.size) {
596     C->ops->solve = MatSolve_SeqAIJ_Inode;
597   } else if (row_identity && col_identity) {
598     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599   } else {
600     C->ops->solve = MatSolve_SeqAIJ;
601   }
602   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
603   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
604   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
605   C->ops->matsolve          = MatMatSolve_SeqAIJ;
606   C->assembled              = PETSC_TRUE;
607   C->preallocated           = PETSC_TRUE;
608 
609   PetscCall(PetscLogFlops(C->cmap->n));
610 
611   /* MatShiftView(A,info,&sctx) */
612   if (sctx.nshift) {
613     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
614       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
615     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
616       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
617     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
618       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
619     }
620   }
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
625 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
626 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
627 
628 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
629 {
630   Mat              C = B;
631   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
632   IS               isrow = b->row, isicol = b->icol;
633   const PetscInt  *r, *ic, *ics;
634   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
635   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
636   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
637   MatScalar       *pv, *rtmp, *pc, multiplier, d;
638   const MatScalar *v, *aa = a->a;
639   PetscReal        rs = 0.0;
640   FactorShiftCtx   sctx;
641   const PetscInt  *ddiag;
642   PetscBool        row_identity, col_identity;
643 
644   PetscFunctionBegin;
645   /* MatPivotSetUp(): initialize shift context sctx */
646   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
647 
648   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
649     ddiag          = a->diag;
650     sctx.shift_top = info->zeropivot;
651     for (i = 0; i < n; i++) {
652       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
653       d  = (aa)[ddiag[i]];
654       rs = -PetscAbsScalar(d) - PetscRealPart(d);
655       v  = aa + ai[i];
656       nz = ai[i + 1] - ai[i];
657       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
658       if (rs > sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top *= 1.1;
661     sctx.nshift_max = 5;
662     sctx.shift_lo   = 0.;
663     sctx.shift_hi   = 1.;
664   }
665 
666   PetscCall(ISGetIndices(isrow, &r));
667   PetscCall(ISGetIndices(isicol, &ic));
668   PetscCall(PetscMalloc1(n + 1, &rtmp));
669   ics = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i = 0; i < n; i++) {
674       nz    = bi[i + 1] - bi[i];
675       bjtmp = bj + bi[i];
676       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i] + 1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
683       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
684 
685       row = *bjtmp++;
686       while (row < i) {
687         pc = rtmp + row;
688         if (*pc != 0.0) {
689           pv         = b->a + diag_offset[row];
690           pj         = b->j + diag_offset[row] + 1;
691           multiplier = *pc / *pv++;
692           *pc        = multiplier;
693           nz         = bi[row + 1] - diag_offset[row] - 1;
694           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
695           PetscCall(PetscLogFlops(1 + 2.0 * nz));
696         }
697         row = *bjtmp++;
698       }
699       /* finished row so stick it into b->a */
700       pv   = b->a + bi[i];
701       pj   = b->j + bi[i];
702       nz   = bi[i + 1] - bi[i];
703       diag = diag_offset[i] - bi[i];
704       rs   = 0.0;
705       for (j = 0; j < nz; j++) {
706         pv[j] = rtmp[pj[j]];
707         rs += PetscAbsScalar(pv[j]);
708       }
709       rs -= PetscAbsScalar(pv[diag]);
710 
711       sctx.rs = rs;
712       sctx.pv = pv[diag];
713       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
714       if (sctx.newshift) break;
715       pv[diag] = sctx.pv;
716     }
717 
718     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
719       /*
720        * if no shift in this attempt & shifting & started shifting & can refine,
721        * then try lower shift
722        */
723       sctx.shift_hi       = sctx.shift_fraction;
724       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
725       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
726       sctx.newshift       = PETSC_TRUE;
727       sctx.nshift++;
728     }
729   } while (sctx.newshift);
730 
731   /* invert diagonal entries for simpler triangular solves */
732   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
733   PetscCall(PetscFree(rtmp));
734   PetscCall(ISRestoreIndices(isicol, &ic));
735   PetscCall(ISRestoreIndices(isrow, &r));
736 
737   PetscCall(ISIdentity(isrow, &row_identity));
738   PetscCall(ISIdentity(isicol, &col_identity));
739   if (row_identity && col_identity) {
740     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
741   } else {
742     C->ops->solve = MatSolve_SeqAIJ_inplace;
743   }
744   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
745   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
746   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
747   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
748 
749   C->assembled    = PETSC_TRUE;
750   C->preallocated = PETSC_TRUE;
751 
752   PetscCall(PetscLogFlops(C->cmap->n));
753   if (sctx.nshift) {
754     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
755       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
756     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
757       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
758     }
759   }
760   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
761   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
762 
763   PetscCall(MatSeqAIJCheckInode(C));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
768 
769 /*
770    This routine implements inplace ILU(0) with row or/and column permutations.
771    Input:
772      A - original matrix
773    Output;
774      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
775          a->j (col index) is permuted by the inverse of colperm, then sorted
776          a->a reordered accordingly with a->j
777          a->diag (ptr to diagonal elements) is updated.
778 */
779 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
780 {
781   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
782   IS               isrow = a->row, isicol = a->icol;
783   const PetscInt  *r, *ic, *ics;
784   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
785   PetscInt        *ajtmp, nz, row;
786   PetscInt        *diag = a->diag, nbdiag, *pj;
787   PetscScalar     *rtmp, *pc, multiplier, d;
788   MatScalar       *pv, *v;
789   PetscReal        rs;
790   FactorShiftCtx   sctx;
791   const MatScalar *aa = a->a, *vtmp;
792 
793   PetscFunctionBegin;
794   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
795 
796   /* MatPivotSetUp(): initialize shift context sctx */
797   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
798 
799   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
800     const PetscInt *ddiag = a->diag;
801     sctx.shift_top        = info->zeropivot;
802     for (i = 0; i < n; i++) {
803       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
804       d    = (aa)[ddiag[i]];
805       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
806       vtmp = aa + ai[i];
807       nz   = ai[i + 1] - ai[i];
808       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
809       if (rs > sctx.shift_top) sctx.shift_top = rs;
810     }
811     sctx.shift_top *= 1.1;
812     sctx.nshift_max = 5;
813     sctx.shift_lo   = 0.;
814     sctx.shift_hi   = 1.;
815   }
816 
817   PetscCall(ISGetIndices(isrow, &r));
818   PetscCall(ISGetIndices(isicol, &ic));
819   PetscCall(PetscMalloc1(n + 1, &rtmp));
820   PetscCall(PetscArrayzero(rtmp, n + 1));
821   ics = ic;
822 
823 #if defined(MV)
824   sctx.shift_top      = 0.;
825   sctx.nshift_max     = 0;
826   sctx.shift_lo       = 0.;
827   sctx.shift_hi       = 0.;
828   sctx.shift_fraction = 0.;
829 
830   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
831     sctx.shift_top = 0.;
832     for (i = 0; i < n; i++) {
833       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
834       d  = (a->a)[diag[i]];
835       rs = -PetscAbsScalar(d) - PetscRealPart(d);
836       v  = a->a + ai[i];
837       nz = ai[i + 1] - ai[i];
838       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
839       if (rs > sctx.shift_top) sctx.shift_top = rs;
840     }
841     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
842     sctx.shift_top *= 1.1;
843     sctx.nshift_max = 5;
844     sctx.shift_lo   = 0.;
845     sctx.shift_hi   = 1.;
846   }
847 
848   sctx.shift_amount = 0.;
849   sctx.nshift       = 0;
850 #endif
851 
852   do {
853     sctx.newshift = PETSC_FALSE;
854     for (i = 0; i < n; i++) {
855       /* load in initial unfactored row */
856       nz    = ai[r[i] + 1] - ai[r[i]];
857       ajtmp = aj + ai[r[i]];
858       v     = a->a + ai[r[i]];
859       /* sort permuted ajtmp and values v accordingly */
860       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
861       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
862 
863       diag[r[i]] = ai[r[i]];
864       for (j = 0; j < nz; j++) {
865         rtmp[ajtmp[j]] = v[j];
866         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
867       }
868       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
869 
870       row = *ajtmp++;
871       while (row < i) {
872         pc = rtmp + row;
873         if (*pc != 0.0) {
874           pv = a->a + diag[r[row]];
875           pj = aj + diag[r[row]] + 1;
876 
877           multiplier = *pc / *pv++;
878           *pc        = multiplier;
879           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
880           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
881           PetscCall(PetscLogFlops(1 + 2.0 * nz));
882         }
883         row = *ajtmp++;
884       }
885       /* finished row so overwrite it onto a->a */
886       pv     = a->a + ai[r[i]];
887       pj     = aj + ai[r[i]];
888       nz     = ai[r[i] + 1] - ai[r[i]];
889       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
890 
891       rs = 0.0;
892       for (j = 0; j < nz; j++) {
893         pv[j] = rtmp[pj[j]];
894         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
895       }
896 
897       sctx.rs = rs;
898       sctx.pv = pv[nbdiag];
899       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
900       if (sctx.newshift) break;
901       pv[nbdiag] = sctx.pv;
902     }
903 
904     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
905       /*
906        * if no shift in this attempt & shifting & started shifting & can refine,
907        * then try lower shift
908        */
909       sctx.shift_hi       = sctx.shift_fraction;
910       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
911       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
912       sctx.newshift       = PETSC_TRUE;
913       sctx.nshift++;
914     }
915   } while (sctx.newshift);
916 
917   /* invert diagonal entries for simpler triangular solves */
918   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
919 
920   PetscCall(PetscFree(rtmp));
921   PetscCall(ISRestoreIndices(isicol, &ic));
922   PetscCall(ISRestoreIndices(isrow, &r));
923 
924   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
925   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
926   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
927   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
928 
929   A->assembled    = PETSC_TRUE;
930   A->preallocated = PETSC_TRUE;
931 
932   PetscCall(PetscLogFlops(A->cmap->n));
933   if (sctx.nshift) {
934     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
935       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
936     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
937       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
938     }
939   }
940   PetscFunctionReturn(PETSC_SUCCESS);
941 }
942 
943 /* ----------------------------------------------------------- */
944 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
945 {
946   Mat C;
947 
948   PetscFunctionBegin;
949   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
950   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
951   PetscCall(MatLUFactorNumeric(C, A, info));
952 
953   A->ops->solve          = C->ops->solve;
954   A->ops->solvetranspose = C->ops->solvetranspose;
955 
956   PetscCall(MatHeaderMerge(A, &C));
957   PetscFunctionReturn(PETSC_SUCCESS);
958 }
959 /* ----------------------------------------------------------- */
960 
961 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
962 {
963   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
964   IS                 iscol = a->col, isrow = a->row;
965   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
966   PetscInt           nz;
967   const PetscInt    *rout, *cout, *r, *c;
968   PetscScalar       *x, *tmp, *tmps, sum;
969   const PetscScalar *b;
970   const MatScalar   *aa = a->a, *v;
971 
972   PetscFunctionBegin;
973   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
974 
975   PetscCall(VecGetArrayRead(bb, &b));
976   PetscCall(VecGetArrayWrite(xx, &x));
977   tmp = a->solve_work;
978 
979   PetscCall(ISGetIndices(isrow, &rout));
980   r = rout;
981   PetscCall(ISGetIndices(iscol, &cout));
982   c = cout + (n - 1);
983 
984   /* forward solve the lower triangular */
985   tmp[0] = b[*r++];
986   tmps   = tmp;
987   for (i = 1; i < n; i++) {
988     v   = aa + ai[i];
989     vi  = aj + ai[i];
990     nz  = a->diag[i] - ai[i];
991     sum = b[*r++];
992     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
993     tmp[i] = sum;
994   }
995 
996   /* backward solve the upper triangular */
997   for (i = n - 1; i >= 0; i--) {
998     v   = aa + a->diag[i] + 1;
999     vi  = aj + a->diag[i] + 1;
1000     nz  = ai[i + 1] - a->diag[i] - 1;
1001     sum = tmp[i];
1002     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1003     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1004   }
1005 
1006   PetscCall(ISRestoreIndices(isrow, &rout));
1007   PetscCall(ISRestoreIndices(iscol, &cout));
1008   PetscCall(VecRestoreArrayRead(bb, &b));
1009   PetscCall(VecRestoreArrayWrite(xx, &x));
1010   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1011   PetscFunctionReturn(PETSC_SUCCESS);
1012 }
1013 
1014 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1015 {
1016   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1017   IS                 iscol = a->col, isrow = a->row;
1018   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1019   PetscInt           nz, neq, ldb, ldx;
1020   const PetscInt    *rout, *cout, *r, *c;
1021   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1022   const PetscScalar *b, *aa  = a->a, *v;
1023   PetscBool          isdense;
1024 
1025   PetscFunctionBegin;
1026   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1027   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1028   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1029   if (X != B) {
1030     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1031     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1032   }
1033   PetscCall(MatDenseGetArrayRead(B, &b));
1034   PetscCall(MatDenseGetLDA(B, &ldb));
1035   PetscCall(MatDenseGetArray(X, &x));
1036   PetscCall(MatDenseGetLDA(X, &ldx));
1037   PetscCall(ISGetIndices(isrow, &rout));
1038   r = rout;
1039   PetscCall(ISGetIndices(iscol, &cout));
1040   c = cout;
1041   for (neq = 0; neq < B->cmap->n; neq++) {
1042     /* forward solve the lower triangular */
1043     tmp[0] = b[r[0]];
1044     tmps   = tmp;
1045     for (i = 1; i < n; i++) {
1046       v   = aa + ai[i];
1047       vi  = aj + ai[i];
1048       nz  = a->diag[i] - ai[i];
1049       sum = b[r[i]];
1050       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1051       tmp[i] = sum;
1052     }
1053     /* backward solve the upper triangular */
1054     for (i = n - 1; i >= 0; i--) {
1055       v   = aa + a->diag[i] + 1;
1056       vi  = aj + a->diag[i] + 1;
1057       nz  = ai[i + 1] - a->diag[i] - 1;
1058       sum = tmp[i];
1059       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1060       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1061     }
1062     b += ldb;
1063     x += ldx;
1064   }
1065   PetscCall(ISRestoreIndices(isrow, &rout));
1066   PetscCall(ISRestoreIndices(iscol, &cout));
1067   PetscCall(MatDenseRestoreArrayRead(B, &b));
1068   PetscCall(MatDenseRestoreArray(X, &x));
1069   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1070   PetscFunctionReturn(PETSC_SUCCESS);
1071 }
1072 
1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1074 {
1075   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1076   IS                 iscol = a->col, isrow = a->row;
1077   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1078   PetscInt           nz, neq, ldb, ldx;
1079   const PetscInt    *rout, *cout, *r, *c;
1080   PetscScalar       *x, *tmp = a->solve_work, sum;
1081   const PetscScalar *b, *aa  = a->a, *v;
1082   PetscBool          isdense;
1083 
1084   PetscFunctionBegin;
1085   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1086   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1087   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1088   if (X != B) {
1089     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1090     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1091   }
1092   PetscCall(MatDenseGetArrayRead(B, &b));
1093   PetscCall(MatDenseGetLDA(B, &ldb));
1094   PetscCall(MatDenseGetArray(X, &x));
1095   PetscCall(MatDenseGetLDA(X, &ldx));
1096   PetscCall(ISGetIndices(isrow, &rout));
1097   r = rout;
1098   PetscCall(ISGetIndices(iscol, &cout));
1099   c = cout;
1100   for (neq = 0; neq < B->cmap->n; neq++) {
1101     /* forward solve the lower triangular */
1102     tmp[0] = b[r[0]];
1103     v      = aa;
1104     vi     = aj;
1105     for (i = 1; i < n; i++) {
1106       nz  = ai[i + 1] - ai[i];
1107       sum = b[r[i]];
1108       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1109       tmp[i] = sum;
1110       v += nz;
1111       vi += nz;
1112     }
1113     /* backward solve the upper triangular */
1114     for (i = n - 1; i >= 0; i--) {
1115       v   = aa + adiag[i + 1] + 1;
1116       vi  = aj + adiag[i + 1] + 1;
1117       nz  = adiag[i] - adiag[i + 1] - 1;
1118       sum = tmp[i];
1119       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1120       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1121     }
1122     b += ldb;
1123     x += ldx;
1124   }
1125   PetscCall(ISRestoreIndices(isrow, &rout));
1126   PetscCall(ISRestoreIndices(iscol, &cout));
1127   PetscCall(MatDenseRestoreArrayRead(B, &b));
1128   PetscCall(MatDenseRestoreArray(X, &x));
1129   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1134 {
1135   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1136   IS                 iscol = a->col, isrow = a->row;
1137   const PetscInt    *r, *c, *rout, *cout;
1138   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1139   PetscInt           nz, row;
1140   PetscScalar       *x, *tmp, *tmps, sum;
1141   const PetscScalar *b;
1142   const MatScalar   *aa = a->a, *v;
1143 
1144   PetscFunctionBegin;
1145   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1146 
1147   PetscCall(VecGetArrayRead(bb, &b));
1148   PetscCall(VecGetArrayWrite(xx, &x));
1149   tmp = a->solve_work;
1150 
1151   PetscCall(ISGetIndices(isrow, &rout));
1152   r = rout;
1153   PetscCall(ISGetIndices(iscol, &cout));
1154   c = cout + (n - 1);
1155 
1156   /* forward solve the lower triangular */
1157   tmp[0] = b[*r++];
1158   tmps   = tmp;
1159   for (row = 1; row < n; row++) {
1160     i   = rout[row]; /* permuted row */
1161     v   = aa + ai[i];
1162     vi  = aj + ai[i];
1163     nz  = a->diag[i] - ai[i];
1164     sum = b[*r++];
1165     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1166     tmp[row] = sum;
1167   }
1168 
1169   /* backward solve the upper triangular */
1170   for (row = n - 1; row >= 0; row--) {
1171     i   = rout[row]; /* permuted row */
1172     v   = aa + a->diag[i] + 1;
1173     vi  = aj + a->diag[i] + 1;
1174     nz  = ai[i + 1] - a->diag[i] - 1;
1175     sum = tmp[row];
1176     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1177     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1178   }
1179 
1180   PetscCall(ISRestoreIndices(isrow, &rout));
1181   PetscCall(ISRestoreIndices(iscol, &cout));
1182   PetscCall(VecRestoreArrayRead(bb, &b));
1183   PetscCall(VecRestoreArrayWrite(xx, &x));
1184   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1185   PetscFunctionReturn(PETSC_SUCCESS);
1186 }
1187 
1188 /* ----------------------------------------------------------- */
1189 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1190 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1191 {
1192   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1193   PetscInt           n  = A->rmap->n;
1194   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1195   PetscScalar       *x;
1196   const PetscScalar *b;
1197   const MatScalar   *aa = a->a;
1198 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1199   PetscInt         adiag_i, i, nz, ai_i;
1200   const PetscInt  *vi;
1201   const MatScalar *v;
1202   PetscScalar      sum;
1203 #endif
1204 
1205   PetscFunctionBegin;
1206   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1207 
1208   PetscCall(VecGetArrayRead(bb, &b));
1209   PetscCall(VecGetArrayWrite(xx, &x));
1210 
1211 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1212   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1213 #else
1214   /* forward solve the lower triangular */
1215   x[0] = b[0];
1216   for (i = 1; i < n; i++) {
1217     ai_i = ai[i];
1218     v    = aa + ai_i;
1219     vi   = aj + ai_i;
1220     nz   = adiag[i] - ai_i;
1221     sum  = b[i];
1222     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1223     x[i] = sum;
1224   }
1225 
1226   /* backward solve the upper triangular */
1227   for (i = n - 1; i >= 0; i--) {
1228     adiag_i = adiag[i];
1229     v       = aa + adiag_i + 1;
1230     vi      = aj + adiag_i + 1;
1231     nz      = ai[i + 1] - adiag_i - 1;
1232     sum     = x[i];
1233     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1234     x[i] = sum * aa[adiag_i];
1235   }
1236 #endif
1237   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1238   PetscCall(VecRestoreArrayRead(bb, &b));
1239   PetscCall(VecRestoreArrayWrite(xx, &x));
1240   PetscFunctionReturn(PETSC_SUCCESS);
1241 }
1242 
1243 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1244 {
1245   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1246   IS                 iscol = a->col, isrow = a->row;
1247   PetscInt           i, n                  = A->rmap->n, j;
1248   PetscInt           nz;
1249   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1250   PetscScalar       *x, *tmp, sum;
1251   const PetscScalar *b;
1252   const MatScalar   *aa = a->a, *v;
1253 
1254   PetscFunctionBegin;
1255   if (yy != xx) PetscCall(VecCopy(yy, xx));
1256 
1257   PetscCall(VecGetArrayRead(bb, &b));
1258   PetscCall(VecGetArray(xx, &x));
1259   tmp = a->solve_work;
1260 
1261   PetscCall(ISGetIndices(isrow, &rout));
1262   r = rout;
1263   PetscCall(ISGetIndices(iscol, &cout));
1264   c = cout + (n - 1);
1265 
1266   /* forward solve the lower triangular */
1267   tmp[0] = b[*r++];
1268   for (i = 1; i < n; i++) {
1269     v   = aa + ai[i];
1270     vi  = aj + ai[i];
1271     nz  = a->diag[i] - ai[i];
1272     sum = b[*r++];
1273     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1274     tmp[i] = sum;
1275   }
1276 
1277   /* backward solve the upper triangular */
1278   for (i = n - 1; i >= 0; i--) {
1279     v   = aa + a->diag[i] + 1;
1280     vi  = aj + a->diag[i] + 1;
1281     nz  = ai[i + 1] - a->diag[i] - 1;
1282     sum = tmp[i];
1283     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1284     tmp[i] = sum * aa[a->diag[i]];
1285     x[*c--] += tmp[i];
1286   }
1287 
1288   PetscCall(ISRestoreIndices(isrow, &rout));
1289   PetscCall(ISRestoreIndices(iscol, &cout));
1290   PetscCall(VecRestoreArrayRead(bb, &b));
1291   PetscCall(VecRestoreArray(xx, &x));
1292   PetscCall(PetscLogFlops(2.0 * a->nz));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1297 {
1298   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1299   IS                 iscol = a->col, isrow = a->row;
1300   PetscInt           i, n                  = A->rmap->n, j;
1301   PetscInt           nz;
1302   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1303   PetscScalar       *x, *tmp, sum;
1304   const PetscScalar *b;
1305   const MatScalar   *aa = a->a, *v;
1306 
1307   PetscFunctionBegin;
1308   if (yy != xx) PetscCall(VecCopy(yy, xx));
1309 
1310   PetscCall(VecGetArrayRead(bb, &b));
1311   PetscCall(VecGetArray(xx, &x));
1312   tmp = a->solve_work;
1313 
1314   PetscCall(ISGetIndices(isrow, &rout));
1315   r = rout;
1316   PetscCall(ISGetIndices(iscol, &cout));
1317   c = cout;
1318 
1319   /* forward solve the lower triangular */
1320   tmp[0] = b[r[0]];
1321   v      = aa;
1322   vi     = aj;
1323   for (i = 1; i < n; i++) {
1324     nz  = ai[i + 1] - ai[i];
1325     sum = b[r[i]];
1326     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1327     tmp[i] = sum;
1328     v += nz;
1329     vi += nz;
1330   }
1331 
1332   /* backward solve the upper triangular */
1333   v  = aa + adiag[n - 1];
1334   vi = aj + adiag[n - 1];
1335   for (i = n - 1; i >= 0; i--) {
1336     nz  = adiag[i] - adiag[i + 1] - 1;
1337     sum = tmp[i];
1338     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1339     tmp[i] = sum * v[nz];
1340     x[c[i]] += tmp[i];
1341     v += nz + 1;
1342     vi += nz + 1;
1343   }
1344 
1345   PetscCall(ISRestoreIndices(isrow, &rout));
1346   PetscCall(ISRestoreIndices(iscol, &cout));
1347   PetscCall(VecRestoreArrayRead(bb, &b));
1348   PetscCall(VecRestoreArray(xx, &x));
1349   PetscCall(PetscLogFlops(2.0 * a->nz));
1350   PetscFunctionReturn(PETSC_SUCCESS);
1351 }
1352 
1353 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1354 {
1355   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1356   IS                 iscol = a->col, isrow = a->row;
1357   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1358   PetscInt           i, n = A->rmap->n, j;
1359   PetscInt           nz;
1360   PetscScalar       *x, *tmp, s1;
1361   const MatScalar   *aa = a->a, *v;
1362   const PetscScalar *b;
1363 
1364   PetscFunctionBegin;
1365   PetscCall(VecGetArrayRead(bb, &b));
1366   PetscCall(VecGetArrayWrite(xx, &x));
1367   tmp = a->solve_work;
1368 
1369   PetscCall(ISGetIndices(isrow, &rout));
1370   r = rout;
1371   PetscCall(ISGetIndices(iscol, &cout));
1372   c = cout;
1373 
1374   /* copy the b into temp work space according to permutation */
1375   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1376 
1377   /* forward solve the U^T */
1378   for (i = 0; i < n; i++) {
1379     v  = aa + diag[i];
1380     vi = aj + diag[i] + 1;
1381     nz = ai[i + 1] - diag[i] - 1;
1382     s1 = tmp[i];
1383     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1384     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1385     tmp[i] = s1;
1386   }
1387 
1388   /* backward solve the L^T */
1389   for (i = n - 1; i >= 0; i--) {
1390     v  = aa + diag[i] - 1;
1391     vi = aj + diag[i] - 1;
1392     nz = diag[i] - ai[i];
1393     s1 = tmp[i];
1394     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1395   }
1396 
1397   /* copy tmp into x according to permutation */
1398   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1399 
1400   PetscCall(ISRestoreIndices(isrow, &rout));
1401   PetscCall(ISRestoreIndices(iscol, &cout));
1402   PetscCall(VecRestoreArrayRead(bb, &b));
1403   PetscCall(VecRestoreArrayWrite(xx, &x));
1404 
1405   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1410 {
1411   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1412   IS                 iscol = a->col, isrow = a->row;
1413   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1414   PetscInt           i, n = A->rmap->n, j;
1415   PetscInt           nz;
1416   PetscScalar       *x, *tmp, s1;
1417   const MatScalar   *aa = a->a, *v;
1418   const PetscScalar *b;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(VecGetArrayRead(bb, &b));
1422   PetscCall(VecGetArrayWrite(xx, &x));
1423   tmp = a->solve_work;
1424 
1425   PetscCall(ISGetIndices(isrow, &rout));
1426   r = rout;
1427   PetscCall(ISGetIndices(iscol, &cout));
1428   c = cout;
1429 
1430   /* copy the b into temp work space according to permutation */
1431   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1432 
1433   /* forward solve the U^T */
1434   for (i = 0; i < n; i++) {
1435     v  = aa + adiag[i + 1] + 1;
1436     vi = aj + adiag[i + 1] + 1;
1437     nz = adiag[i] - adiag[i + 1] - 1;
1438     s1 = tmp[i];
1439     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1440     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1441     tmp[i] = s1;
1442   }
1443 
1444   /* backward solve the L^T */
1445   for (i = n - 1; i >= 0; i--) {
1446     v  = aa + ai[i];
1447     vi = aj + ai[i];
1448     nz = ai[i + 1] - ai[i];
1449     s1 = tmp[i];
1450     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1451   }
1452 
1453   /* copy tmp into x according to permutation */
1454   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1455 
1456   PetscCall(ISRestoreIndices(isrow, &rout));
1457   PetscCall(ISRestoreIndices(iscol, &cout));
1458   PetscCall(VecRestoreArrayRead(bb, &b));
1459   PetscCall(VecRestoreArrayWrite(xx, &x));
1460 
1461   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1466 {
1467   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1468   IS                 iscol = a->col, isrow = a->row;
1469   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1470   PetscInt           i, n = A->rmap->n, j;
1471   PetscInt           nz;
1472   PetscScalar       *x, *tmp, s1;
1473   const MatScalar   *aa = a->a, *v;
1474   const PetscScalar *b;
1475 
1476   PetscFunctionBegin;
1477   if (zz != xx) PetscCall(VecCopy(zz, xx));
1478   PetscCall(VecGetArrayRead(bb, &b));
1479   PetscCall(VecGetArray(xx, &x));
1480   tmp = a->solve_work;
1481 
1482   PetscCall(ISGetIndices(isrow, &rout));
1483   r = rout;
1484   PetscCall(ISGetIndices(iscol, &cout));
1485   c = cout;
1486 
1487   /* copy the b into temp work space according to permutation */
1488   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1489 
1490   /* forward solve the U^T */
1491   for (i = 0; i < n; i++) {
1492     v  = aa + diag[i];
1493     vi = aj + diag[i] + 1;
1494     nz = ai[i + 1] - diag[i] - 1;
1495     s1 = tmp[i];
1496     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1497     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1498     tmp[i] = s1;
1499   }
1500 
1501   /* backward solve the L^T */
1502   for (i = n - 1; i >= 0; i--) {
1503     v  = aa + diag[i] - 1;
1504     vi = aj + diag[i] - 1;
1505     nz = diag[i] - ai[i];
1506     s1 = tmp[i];
1507     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1508   }
1509 
1510   /* copy tmp into x according to permutation */
1511   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1512 
1513   PetscCall(ISRestoreIndices(isrow, &rout));
1514   PetscCall(ISRestoreIndices(iscol, &cout));
1515   PetscCall(VecRestoreArrayRead(bb, &b));
1516   PetscCall(VecRestoreArray(xx, &x));
1517 
1518   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1519   PetscFunctionReturn(PETSC_SUCCESS);
1520 }
1521 
1522 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1523 {
1524   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1525   IS                 iscol = a->col, isrow = a->row;
1526   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1527   PetscInt           i, n = A->rmap->n, j;
1528   PetscInt           nz;
1529   PetscScalar       *x, *tmp, s1;
1530   const MatScalar   *aa = a->a, *v;
1531   const PetscScalar *b;
1532 
1533   PetscFunctionBegin;
1534   if (zz != xx) PetscCall(VecCopy(zz, xx));
1535   PetscCall(VecGetArrayRead(bb, &b));
1536   PetscCall(VecGetArray(xx, &x));
1537   tmp = a->solve_work;
1538 
1539   PetscCall(ISGetIndices(isrow, &rout));
1540   r = rout;
1541   PetscCall(ISGetIndices(iscol, &cout));
1542   c = cout;
1543 
1544   /* copy the b into temp work space according to permutation */
1545   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1546 
1547   /* forward solve the U^T */
1548   for (i = 0; i < n; i++) {
1549     v  = aa + adiag[i + 1] + 1;
1550     vi = aj + adiag[i + 1] + 1;
1551     nz = adiag[i] - adiag[i + 1] - 1;
1552     s1 = tmp[i];
1553     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1554     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1555     tmp[i] = s1;
1556   }
1557 
1558   /* backward solve the L^T */
1559   for (i = n - 1; i >= 0; i--) {
1560     v  = aa + ai[i];
1561     vi = aj + ai[i];
1562     nz = ai[i + 1] - ai[i];
1563     s1 = tmp[i];
1564     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1565   }
1566 
1567   /* copy tmp into x according to permutation */
1568   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1569 
1570   PetscCall(ISRestoreIndices(isrow, &rout));
1571   PetscCall(ISRestoreIndices(iscol, &cout));
1572   PetscCall(VecRestoreArrayRead(bb, &b));
1573   PetscCall(VecRestoreArray(xx, &x));
1574 
1575   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 /* ----------------------------------------------------------------*/
1580 
1581 /*
1582    ilu() under revised new data structure.
1583    Factored arrays bj and ba are stored as
1584      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1585 
1586    bi=fact->i is an array of size n+1, in which
1587      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1588      bi[n]:  points to L(n-1,n-1)+1
1589 
1590   bdiag=fact->diag is an array of size n+1,in which
1591      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1592      bdiag[n]: points to entry of U(n-1,0)-1
1593 
1594    U(i,:) contains bdiag[i] as its last entry, i.e.,
1595     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1596 */
1597 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1598 {
1599   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1600   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1601   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1602   IS             isicol;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1606   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1607   b = (Mat_SeqAIJ *)(fact)->data;
1608 
1609   /* allocate matrix arrays for new data structure */
1610   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1611 
1612   b->singlemalloc = PETSC_TRUE;
1613   if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); }
1614   bdiag = b->diag;
1615 
1616   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1617 
1618   /* set bi and bj with new data structure */
1619   bi = b->i;
1620   bj = b->j;
1621 
1622   /* L part */
1623   bi[0] = 0;
1624   for (i = 0; i < n; i++) {
1625     nz        = adiag[i] - ai[i];
1626     bi[i + 1] = bi[i] + nz;
1627     aj        = a->j + ai[i];
1628     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1629   }
1630 
1631   /* U part */
1632   bdiag[n] = bi[n] - 1;
1633   for (i = n - 1; i >= 0; i--) {
1634     nz = ai[i + 1] - adiag[i] - 1;
1635     aj = a->j + adiag[i] + 1;
1636     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1637     /* diag[i] */
1638     bj[k++]  = i;
1639     bdiag[i] = bdiag[i + 1] + nz + 1;
1640   }
1641 
1642   fact->factortype             = MAT_FACTOR_ILU;
1643   fact->info.factor_mallocs    = 0;
1644   fact->info.fill_ratio_given  = info->fill;
1645   fact->info.fill_ratio_needed = 1.0;
1646   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1647   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1648 
1649   b       = (Mat_SeqAIJ *)(fact)->data;
1650   b->row  = isrow;
1651   b->col  = iscol;
1652   b->icol = isicol;
1653   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1654   PetscCall(PetscObjectReference((PetscObject)isrow));
1655   PetscCall(PetscObjectReference((PetscObject)iscol));
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1660 {
1661   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1662   IS                 isicol;
1663   const PetscInt    *r, *ic;
1664   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1665   PetscInt          *bi, *cols, nnz, *cols_lvl;
1666   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1667   PetscInt           i, levels, diagonal_fill;
1668   PetscBool          col_identity, row_identity, missing;
1669   PetscReal          f;
1670   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1671   PetscBT            lnkbt;
1672   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1673   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1674   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1675 
1676   PetscFunctionBegin;
1677   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1678   PetscCall(MatMissingDiagonal(A, &missing, &i));
1679   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1680 
1681   levels = (PetscInt)info->levels;
1682   PetscCall(ISIdentity(isrow, &row_identity));
1683   PetscCall(ISIdentity(iscol, &col_identity));
1684   if (!levels && row_identity && col_identity) {
1685     /* special case: ilu(0) with natural ordering */
1686     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1687     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1688     PetscFunctionReturn(PETSC_SUCCESS);
1689   }
1690 
1691   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1692   PetscCall(ISGetIndices(isrow, &r));
1693   PetscCall(ISGetIndices(isicol, &ic));
1694 
1695   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1696   PetscCall(PetscMalloc1(n + 1, &bi));
1697   PetscCall(PetscMalloc1(n + 1, &bdiag));
1698   bi[0] = bdiag[0] = 0;
1699   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1700 
1701   /* create a linked list for storing column indices of the active row */
1702   nlnk = n + 1;
1703   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1704 
1705   /* initial FreeSpace size is f*(ai[n]+1) */
1706   f             = info->fill;
1707   diagonal_fill = (PetscInt)info->diagonal_fill;
1708   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1709   current_space = free_space;
1710   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1711   current_space_lvl = free_space_lvl;
1712   for (i = 0; i < n; i++) {
1713     nzi = 0;
1714     /* copy current row into linked list */
1715     nnz = ai[r[i] + 1] - ai[r[i]];
1716     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1717     cols   = aj + ai[r[i]];
1718     lnk[i] = -1; /* marker to indicate if diagonal exists */
1719     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1720     nzi += nlnk;
1721 
1722     /* make sure diagonal entry is included */
1723     if (diagonal_fill && lnk[i] == -1) {
1724       fm = n;
1725       while (lnk[fm] < i) fm = lnk[fm];
1726       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1727       lnk[fm]    = i;
1728       lnk_lvl[i] = 0;
1729       nzi++;
1730       dcount++;
1731     }
1732 
1733     /* add pivot rows into the active row */
1734     nzbd = 0;
1735     prow = lnk[n];
1736     while (prow < i) {
1737       nnz      = bdiag[prow];
1738       cols     = bj_ptr[prow] + nnz + 1;
1739       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1740       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1741       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1742       nzi += nlnk;
1743       prow = lnk[prow];
1744       nzbd++;
1745     }
1746     bdiag[i]  = nzbd;
1747     bi[i + 1] = bi[i] + nzi;
1748     /* if free space is not available, make more free space */
1749     if (current_space->local_remaining < nzi) {
1750       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1751       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1752       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1753       reallocs++;
1754     }
1755 
1756     /* copy data into free_space and free_space_lvl, then initialize lnk */
1757     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1758     bj_ptr[i]    = current_space->array;
1759     bjlvl_ptr[i] = current_space_lvl->array;
1760 
1761     /* make sure the active row i has diagonal entry */
1762     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1763 
1764     current_space->array += nzi;
1765     current_space->local_used += nzi;
1766     current_space->local_remaining -= nzi;
1767     current_space_lvl->array += nzi;
1768     current_space_lvl->local_used += nzi;
1769     current_space_lvl->local_remaining -= nzi;
1770   }
1771 
1772   PetscCall(ISRestoreIndices(isrow, &r));
1773   PetscCall(ISRestoreIndices(isicol, &ic));
1774   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1775   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1776   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1777 
1778   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1779   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1780   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1781 
1782 #if defined(PETSC_USE_INFO)
1783   {
1784     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1785     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1786     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1787     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1788     PetscCall(PetscInfo(A, "for best performance.\n"));
1789     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1790   }
1791 #endif
1792   /* put together the new matrix */
1793   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1794   b = (Mat_SeqAIJ *)(fact)->data;
1795 
1796   b->free_a       = PETSC_TRUE;
1797   b->free_ij      = PETSC_TRUE;
1798   b->singlemalloc = PETSC_FALSE;
1799 
1800   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1801 
1802   b->j    = bj;
1803   b->i    = bi;
1804   b->diag = bdiag;
1805   b->ilen = NULL;
1806   b->imax = NULL;
1807   b->row  = isrow;
1808   b->col  = iscol;
1809   PetscCall(PetscObjectReference((PetscObject)isrow));
1810   PetscCall(PetscObjectReference((PetscObject)iscol));
1811   b->icol = isicol;
1812 
1813   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1814   /* In b structure:  Free imax, ilen, old a, old j.
1815      Allocate bdiag, solve_work, new a, new j */
1816   b->maxnz = b->nz = bdiag[0] + 1;
1817 
1818   (fact)->info.factor_mallocs    = reallocs;
1819   (fact)->info.fill_ratio_given  = f;
1820   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1821   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1822   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1823   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1824   PetscFunctionReturn(PETSC_SUCCESS);
1825 }
1826 
1827 #if 0
1828 // unused
1829 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1830 {
1831   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1832   IS                 isicol;
1833   const PetscInt    *r, *ic;
1834   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1835   PetscInt          *bi, *cols, nnz, *cols_lvl;
1836   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1837   PetscInt           i, levels, diagonal_fill;
1838   PetscBool          col_identity, row_identity;
1839   PetscReal          f;
1840   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1841   PetscBT            lnkbt;
1842   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1843   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1844   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1845   PetscBool          missing;
1846 
1847   PetscFunctionBegin;
1848   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1849   PetscCall(MatMissingDiagonal(A, &missing, &i));
1850   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1851 
1852   f             = info->fill;
1853   levels        = (PetscInt)info->levels;
1854   diagonal_fill = (PetscInt)info->diagonal_fill;
1855 
1856   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1857 
1858   PetscCall(ISIdentity(isrow, &row_identity));
1859   PetscCall(ISIdentity(iscol, &col_identity));
1860   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1861     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1862 
1863     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1864     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1865     fact->factortype               = MAT_FACTOR_ILU;
1866     (fact)->info.factor_mallocs    = 0;
1867     (fact)->info.fill_ratio_given  = info->fill;
1868     (fact)->info.fill_ratio_needed = 1.0;
1869 
1870     b       = (Mat_SeqAIJ *)(fact)->data;
1871     b->row  = isrow;
1872     b->col  = iscol;
1873     b->icol = isicol;
1874     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1875     PetscCall(PetscObjectReference((PetscObject)isrow));
1876     PetscCall(PetscObjectReference((PetscObject)iscol));
1877     PetscFunctionReturn(PETSC_SUCCESS);
1878   }
1879 
1880   PetscCall(ISGetIndices(isrow, &r));
1881   PetscCall(ISGetIndices(isicol, &ic));
1882 
1883   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1884   PetscCall(PetscMalloc1(n + 1, &bi));
1885   PetscCall(PetscMalloc1(n + 1, &bdiag));
1886   bi[0] = bdiag[0] = 0;
1887 
1888   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1889 
1890   /* create a linked list for storing column indices of the active row */
1891   nlnk = n + 1;
1892   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1893 
1894   /* initial FreeSpace size is f*(ai[n]+1) */
1895   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1896   current_space = free_space;
1897   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1898   current_space_lvl = free_space_lvl;
1899 
1900   for (i = 0; i < n; i++) {
1901     nzi = 0;
1902     /* copy current row into linked list */
1903     nnz = ai[r[i] + 1] - ai[r[i]];
1904     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1905     cols   = aj + ai[r[i]];
1906     lnk[i] = -1; /* marker to indicate if diagonal exists */
1907     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1908     nzi += nlnk;
1909 
1910     /* make sure diagonal entry is included */
1911     if (diagonal_fill && lnk[i] == -1) {
1912       fm = n;
1913       while (lnk[fm] < i) fm = lnk[fm];
1914       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1915       lnk[fm]    = i;
1916       lnk_lvl[i] = 0;
1917       nzi++;
1918       dcount++;
1919     }
1920 
1921     /* add pivot rows into the active row */
1922     nzbd = 0;
1923     prow = lnk[n];
1924     while (prow < i) {
1925       nnz      = bdiag[prow];
1926       cols     = bj_ptr[prow] + nnz + 1;
1927       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1928       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1929       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1930       nzi += nlnk;
1931       prow = lnk[prow];
1932       nzbd++;
1933     }
1934     bdiag[i]  = nzbd;
1935     bi[i + 1] = bi[i] + nzi;
1936 
1937     /* if free space is not available, make more free space */
1938     if (current_space->local_remaining < nzi) {
1939       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1940       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1941       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1942       reallocs++;
1943     }
1944 
1945     /* copy data into free_space and free_space_lvl, then initialize lnk */
1946     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1947     bj_ptr[i]    = current_space->array;
1948     bjlvl_ptr[i] = current_space_lvl->array;
1949 
1950     /* make sure the active row i has diagonal entry */
1951     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1952 
1953     current_space->array += nzi;
1954     current_space->local_used += nzi;
1955     current_space->local_remaining -= nzi;
1956     current_space_lvl->array += nzi;
1957     current_space_lvl->local_used += nzi;
1958     current_space_lvl->local_remaining -= nzi;
1959   }
1960 
1961   PetscCall(ISRestoreIndices(isrow, &r));
1962   PetscCall(ISRestoreIndices(isicol, &ic));
1963 
1964   /* destroy list of free space and other temporary arrays */
1965   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1966   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1967   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1968   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1969   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1970 
1971   #if defined(PETSC_USE_INFO)
1972   {
1973     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1974     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1975     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1976     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1977     PetscCall(PetscInfo(A, "for best performance.\n"));
1978     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1979   }
1980   #endif
1981 
1982   /* put together the new matrix */
1983   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1984   b = (Mat_SeqAIJ *)(fact)->data;
1985 
1986   b->free_a       = PETSC_TRUE;
1987   b->free_ij      = PETSC_TRUE;
1988   b->singlemalloc = PETSC_FALSE;
1989 
1990   PetscCall(PetscMalloc1(bi[n], &b->a));
1991   b->j = bj;
1992   b->i = bi;
1993   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1994   b->diag = bdiag;
1995   b->ilen = NULL;
1996   b->imax = NULL;
1997   b->row  = isrow;
1998   b->col  = iscol;
1999   PetscCall(PetscObjectReference((PetscObject)isrow));
2000   PetscCall(PetscObjectReference((PetscObject)iscol));
2001   b->icol = isicol;
2002   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2003   /* In b structure:  Free imax, ilen, old a, old j.
2004      Allocate bdiag, solve_work, new a, new j */
2005   b->maxnz = b->nz = bi[n];
2006 
2007   (fact)->info.factor_mallocs    = reallocs;
2008   (fact)->info.fill_ratio_given  = f;
2009   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2010   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2011   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 #endif
2015 
2016 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2017 {
2018   Mat             C  = B;
2019   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2020   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2021   IS              ip = b->row, iip = b->icol;
2022   const PetscInt *rip, *riip;
2023   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2024   PetscInt       *ai = a->i, *aj = a->j;
2025   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2026   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2027   PetscBool       perm_identity;
2028   FactorShiftCtx  sctx;
2029   PetscReal       rs;
2030   MatScalar       d, *v;
2031 
2032   PetscFunctionBegin;
2033   /* MatPivotSetUp(): initialize shift context sctx */
2034   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2035 
2036   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2037     sctx.shift_top = info->zeropivot;
2038     for (i = 0; i < mbs; i++) {
2039       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2040       d  = (aa)[a->diag[i]];
2041       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2042       v  = aa + ai[i];
2043       nz = ai[i + 1] - ai[i];
2044       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2045       if (rs > sctx.shift_top) sctx.shift_top = rs;
2046     }
2047     sctx.shift_top *= 1.1;
2048     sctx.nshift_max = 5;
2049     sctx.shift_lo   = 0.;
2050     sctx.shift_hi   = 1.;
2051   }
2052 
2053   PetscCall(ISGetIndices(ip, &rip));
2054   PetscCall(ISGetIndices(iip, &riip));
2055 
2056   /* allocate working arrays
2057      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2058      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2059   */
2060   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2061 
2062   do {
2063     sctx.newshift = PETSC_FALSE;
2064 
2065     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2066     if (mbs) il[0] = 0;
2067 
2068     for (k = 0; k < mbs; k++) {
2069       /* zero rtmp */
2070       nz    = bi[k + 1] - bi[k];
2071       bjtmp = bj + bi[k];
2072       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2073 
2074       /* load in initial unfactored row */
2075       bval = ba + bi[k];
2076       jmin = ai[rip[k]];
2077       jmax = ai[rip[k] + 1];
2078       for (j = jmin; j < jmax; j++) {
2079         col = riip[aj[j]];
2080         if (col >= k) { /* only take upper triangular entry */
2081           rtmp[col] = aa[j];
2082           *bval++   = 0.0; /* for in-place factorization */
2083         }
2084       }
2085       /* shift the diagonal of the matrix: ZeropivotApply() */
2086       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2087 
2088       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2089       dk = rtmp[k];
2090       i  = c2r[k]; /* first row to be added to k_th row  */
2091 
2092       while (i < k) {
2093         nexti = c2r[i]; /* next row to be added to k_th row */
2094 
2095         /* compute multiplier, update diag(k) and U(i,k) */
2096         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2097         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2098         dk += uikdi * ba[ili];           /* update diag[k] */
2099         ba[ili] = uikdi;                 /* -U(i,k) */
2100 
2101         /* add multiple of row i to k-th row */
2102         jmin = ili + 1;
2103         jmax = bi[i + 1];
2104         if (jmin < jmax) {
2105           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2106           /* update il and c2r for row i */
2107           il[i]  = jmin;
2108           j      = bj[jmin];
2109           c2r[i] = c2r[j];
2110           c2r[j] = i;
2111         }
2112         i = nexti;
2113       }
2114 
2115       /* copy data into U(k,:) */
2116       rs   = 0.0;
2117       jmin = bi[k];
2118       jmax = bi[k + 1] - 1;
2119       if (jmin < jmax) {
2120         for (j = jmin; j < jmax; j++) {
2121           col   = bj[j];
2122           ba[j] = rtmp[col];
2123           rs += PetscAbsScalar(ba[j]);
2124         }
2125         /* add the k-th row into il and c2r */
2126         il[k]  = jmin;
2127         i      = bj[jmin];
2128         c2r[k] = c2r[i];
2129         c2r[i] = k;
2130       }
2131 
2132       /* MatPivotCheck() */
2133       sctx.rs = rs;
2134       sctx.pv = dk;
2135       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2136       if (sctx.newshift) break;
2137       dk = sctx.pv;
2138 
2139       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2140     }
2141   } while (sctx.newshift);
2142 
2143   PetscCall(PetscFree3(rtmp, il, c2r));
2144   PetscCall(ISRestoreIndices(ip, &rip));
2145   PetscCall(ISRestoreIndices(iip, &riip));
2146 
2147   PetscCall(ISIdentity(ip, &perm_identity));
2148   if (perm_identity) {
2149     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2150     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2151     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2152     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2153   } else {
2154     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2155     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2156     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2157     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2158   }
2159 
2160   C->assembled    = PETSC_TRUE;
2161   C->preallocated = PETSC_TRUE;
2162 
2163   PetscCall(PetscLogFlops(C->rmap->n));
2164 
2165   /* MatPivotView() */
2166   if (sctx.nshift) {
2167     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2168       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2169     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2170       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2171     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2172       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2173     }
2174   }
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2179 {
2180   Mat             C  = B;
2181   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2182   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2183   IS              ip = b->row, iip = b->icol;
2184   const PetscInt *rip, *riip;
2185   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2186   PetscInt       *ai = a->i, *aj = a->j;
2187   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2188   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2189   PetscBool       perm_identity;
2190   FactorShiftCtx  sctx;
2191   PetscReal       rs;
2192   MatScalar       d, *v;
2193 
2194   PetscFunctionBegin;
2195   /* MatPivotSetUp(): initialize shift context sctx */
2196   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2197 
2198   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2199     sctx.shift_top = info->zeropivot;
2200     for (i = 0; i < mbs; i++) {
2201       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2202       d  = (aa)[a->diag[i]];
2203       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2204       v  = aa + ai[i];
2205       nz = ai[i + 1] - ai[i];
2206       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2207       if (rs > sctx.shift_top) sctx.shift_top = rs;
2208     }
2209     sctx.shift_top *= 1.1;
2210     sctx.nshift_max = 5;
2211     sctx.shift_lo   = 0.;
2212     sctx.shift_hi   = 1.;
2213   }
2214 
2215   PetscCall(ISGetIndices(ip, &rip));
2216   PetscCall(ISGetIndices(iip, &riip));
2217 
2218   /* initialization */
2219   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2220 
2221   do {
2222     sctx.newshift = PETSC_FALSE;
2223 
2224     for (i = 0; i < mbs; i++) jl[i] = mbs;
2225     il[0] = 0;
2226 
2227     for (k = 0; k < mbs; k++) {
2228       /* zero rtmp */
2229       nz    = bi[k + 1] - bi[k];
2230       bjtmp = bj + bi[k];
2231       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2232 
2233       bval = ba + bi[k];
2234       /* initialize k-th row by the perm[k]-th row of A */
2235       jmin = ai[rip[k]];
2236       jmax = ai[rip[k] + 1];
2237       for (j = jmin; j < jmax; j++) {
2238         col = riip[aj[j]];
2239         if (col >= k) { /* only take upper triangular entry */
2240           rtmp[col] = aa[j];
2241           *bval++   = 0.0; /* for in-place factorization */
2242         }
2243       }
2244       /* shift the diagonal of the matrix */
2245       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2246 
2247       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2248       dk = rtmp[k];
2249       i  = jl[k]; /* first row to be added to k_th row  */
2250 
2251       while (i < k) {
2252         nexti = jl[i]; /* next row to be added to k_th row */
2253 
2254         /* compute multiplier, update diag(k) and U(i,k) */
2255         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2256         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2257         dk += uikdi * ba[ili];
2258         ba[ili] = uikdi; /* -U(i,k) */
2259 
2260         /* add multiple of row i to k-th row */
2261         jmin = ili + 1;
2262         jmax = bi[i + 1];
2263         if (jmin < jmax) {
2264           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2265           /* update il and jl for row i */
2266           il[i] = jmin;
2267           j     = bj[jmin];
2268           jl[i] = jl[j];
2269           jl[j] = i;
2270         }
2271         i = nexti;
2272       }
2273 
2274       /* shift the diagonals when zero pivot is detected */
2275       /* compute rs=sum of abs(off-diagonal) */
2276       rs   = 0.0;
2277       jmin = bi[k] + 1;
2278       nz   = bi[k + 1] - jmin;
2279       bcol = bj + jmin;
2280       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2281 
2282       sctx.rs = rs;
2283       sctx.pv = dk;
2284       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2285       if (sctx.newshift) break;
2286       dk = sctx.pv;
2287 
2288       /* copy data into U(k,:) */
2289       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2290       jmin      = bi[k] + 1;
2291       jmax      = bi[k + 1];
2292       if (jmin < jmax) {
2293         for (j = jmin; j < jmax; j++) {
2294           col   = bj[j];
2295           ba[j] = rtmp[col];
2296         }
2297         /* add the k-th row into il and jl */
2298         il[k] = jmin;
2299         i     = bj[jmin];
2300         jl[k] = jl[i];
2301         jl[i] = k;
2302       }
2303     }
2304   } while (sctx.newshift);
2305 
2306   PetscCall(PetscFree3(rtmp, il, jl));
2307   PetscCall(ISRestoreIndices(ip, &rip));
2308   PetscCall(ISRestoreIndices(iip, &riip));
2309 
2310   PetscCall(ISIdentity(ip, &perm_identity));
2311   if (perm_identity) {
2312     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2313     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2314     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2315     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2316   } else {
2317     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2318     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2319     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2320     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2321   }
2322 
2323   C->assembled    = PETSC_TRUE;
2324   C->preallocated = PETSC_TRUE;
2325 
2326   PetscCall(PetscLogFlops(C->rmap->n));
2327   if (sctx.nshift) {
2328     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2329       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2330     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2331       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2332     }
2333   }
2334   PetscFunctionReturn(PETSC_SUCCESS);
2335 }
2336 
2337 /*
2338    icc() under revised new data structure.
2339    Factored arrays bj and ba are stored as
2340      U(0,:),...,U(i,:),U(n-1,:)
2341 
2342    ui=fact->i is an array of size n+1, in which
2343    ui+
2344      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2345      ui[n]:  points to U(n-1,n-1)+1
2346 
2347   udiag=fact->diag is an array of size n,in which
2348      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2349 
2350    U(i,:) contains udiag[i] as its last entry, i.e.,
2351     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2352 */
2353 
2354 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2355 {
2356   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2357   Mat_SeqSBAIJ      *b;
2358   PetscBool          perm_identity, missing;
2359   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2360   const PetscInt    *rip, *riip;
2361   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2362   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2363   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2364   PetscReal          fill = info->fill, levels = info->levels;
2365   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2366   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2367   PetscBT            lnkbt;
2368   IS                 iperm;
2369 
2370   PetscFunctionBegin;
2371   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2372   PetscCall(MatMissingDiagonal(A, &missing, &d));
2373   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2374   PetscCall(ISIdentity(perm, &perm_identity));
2375   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2376 
2377   PetscCall(PetscMalloc1(am + 1, &ui));
2378   PetscCall(PetscMalloc1(am + 1, &udiag));
2379   ui[0] = 0;
2380 
2381   /* ICC(0) without matrix ordering: simply rearrange column indices */
2382   if (!levels && perm_identity) {
2383     for (i = 0; i < am; i++) {
2384       ncols     = ai[i + 1] - a->diag[i];
2385       ui[i + 1] = ui[i] + ncols;
2386       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2387     }
2388     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2389     cols = uj;
2390     for (i = 0; i < am; i++) {
2391       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2392       ncols = ai[i + 1] - a->diag[i] - 1;
2393       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2394       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2395     }
2396   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2397     PetscCall(ISGetIndices(iperm, &riip));
2398     PetscCall(ISGetIndices(perm, &rip));
2399 
2400     /* initialization */
2401     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2402 
2403     /* jl: linked list for storing indices of the pivot rows
2404        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2405     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2406     for (i = 0; i < am; i++) {
2407       jl[i] = am;
2408       il[i] = 0;
2409     }
2410 
2411     /* create and initialize a linked list for storing column indices of the active row k */
2412     nlnk = am + 1;
2413     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2414 
2415     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2416     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2417     current_space = free_space;
2418     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2419     current_space_lvl = free_space_lvl;
2420 
2421     for (k = 0; k < am; k++) { /* for each active row k */
2422       /* initialize lnk by the column indices of row rip[k] of A */
2423       nzk   = 0;
2424       ncols = ai[rip[k] + 1] - ai[rip[k]];
2425       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2426       ncols_upper = 0;
2427       for (j = 0; j < ncols; j++) {
2428         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2429         if (riip[i] >= k) {         /* only take upper triangular entry */
2430           ajtmp[ncols_upper] = i;
2431           ncols_upper++;
2432         }
2433       }
2434       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2435       nzk += nlnk;
2436 
2437       /* update lnk by computing fill-in for each pivot row to be merged in */
2438       prow = jl[k]; /* 1st pivot row */
2439 
2440       while (prow < k) {
2441         nextprow = jl[prow];
2442 
2443         /* merge prow into k-th row */
2444         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2445         jmax  = ui[prow + 1];
2446         ncols = jmax - jmin;
2447         i     = jmin - ui[prow];
2448         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2449         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2450         j     = *(uj - 1);
2451         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2452         nzk += nlnk;
2453 
2454         /* update il and jl for prow */
2455         if (jmin < jmax) {
2456           il[prow] = jmin;
2457           j        = *cols;
2458           jl[prow] = jl[j];
2459           jl[j]    = prow;
2460         }
2461         prow = nextprow;
2462       }
2463 
2464       /* if free space is not available, make more free space */
2465       if (current_space->local_remaining < nzk) {
2466         i = am - k + 1;                                    /* num of unfactored rows */
2467         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2468         PetscCall(PetscFreeSpaceGet(i, &current_space));
2469         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2470         reallocs++;
2471       }
2472 
2473       /* copy data into free_space and free_space_lvl, then initialize lnk */
2474       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2475       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2476 
2477       /* add the k-th row into il and jl */
2478       if (nzk > 1) {
2479         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2480         jl[k] = jl[i];
2481         jl[i] = k;
2482         il[k] = ui[k] + 1;
2483       }
2484       uj_ptr[k]     = current_space->array;
2485       uj_lvl_ptr[k] = current_space_lvl->array;
2486 
2487       current_space->array += nzk;
2488       current_space->local_used += nzk;
2489       current_space->local_remaining -= nzk;
2490 
2491       current_space_lvl->array += nzk;
2492       current_space_lvl->local_used += nzk;
2493       current_space_lvl->local_remaining -= nzk;
2494 
2495       ui[k + 1] = ui[k] + nzk;
2496     }
2497 
2498     PetscCall(ISRestoreIndices(perm, &rip));
2499     PetscCall(ISRestoreIndices(iperm, &riip));
2500     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2501     PetscCall(PetscFree(ajtmp));
2502 
2503     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2504     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2505     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2506     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2507     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2508 
2509   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2510 
2511   /* put together the new matrix in MATSEQSBAIJ format */
2512   b               = (Mat_SeqSBAIJ *)(fact)->data;
2513   b->singlemalloc = PETSC_FALSE;
2514 
2515   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2516 
2517   b->j         = uj;
2518   b->i         = ui;
2519   b->diag      = udiag;
2520   b->free_diag = PETSC_TRUE;
2521   b->ilen      = NULL;
2522   b->imax      = NULL;
2523   b->row       = perm;
2524   b->col       = perm;
2525   PetscCall(PetscObjectReference((PetscObject)perm));
2526   PetscCall(PetscObjectReference((PetscObject)perm));
2527   b->icol          = iperm;
2528   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2529 
2530   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2531 
2532   b->maxnz = b->nz = ui[am];
2533   b->free_a        = PETSC_TRUE;
2534   b->free_ij       = PETSC_TRUE;
2535 
2536   fact->info.factor_mallocs   = reallocs;
2537   fact->info.fill_ratio_given = fill;
2538   if (ai[am] != 0) {
2539     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2540     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2541   } else {
2542     fact->info.fill_ratio_needed = 0.0;
2543   }
2544 #if defined(PETSC_USE_INFO)
2545   if (ai[am] != 0) {
2546     PetscReal af = fact->info.fill_ratio_needed;
2547     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2548     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2549     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2550   } else {
2551     PetscCall(PetscInfo(A, "Empty matrix\n"));
2552   }
2553 #endif
2554   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 #if 0
2559 // unused
2560 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2561 {
2562   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2563   Mat_SeqSBAIJ      *b;
2564   PetscBool          perm_identity, missing;
2565   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2566   const PetscInt    *rip, *riip;
2567   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2568   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2569   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2570   PetscReal          fill = info->fill, levels = info->levels;
2571   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2572   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2573   PetscBT            lnkbt;
2574   IS                 iperm;
2575 
2576   PetscFunctionBegin;
2577   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2578   PetscCall(MatMissingDiagonal(A, &missing, &d));
2579   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2580   PetscCall(ISIdentity(perm, &perm_identity));
2581   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2582 
2583   PetscCall(PetscMalloc1(am + 1, &ui));
2584   PetscCall(PetscMalloc1(am + 1, &udiag));
2585   ui[0] = 0;
2586 
2587   /* ICC(0) without matrix ordering: simply copies fill pattern */
2588   if (!levels && perm_identity) {
2589     for (i = 0; i < am; i++) {
2590       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2591       udiag[i]  = ui[i];
2592     }
2593     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2594     cols = uj;
2595     for (i = 0; i < am; i++) {
2596       aj    = a->j + a->diag[i];
2597       ncols = ui[i + 1] - ui[i];
2598       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2599     }
2600   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2601     PetscCall(ISGetIndices(iperm, &riip));
2602     PetscCall(ISGetIndices(perm, &rip));
2603 
2604     /* initialization */
2605     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2606 
2607     /* jl: linked list for storing indices of the pivot rows
2608        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2609     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2610     for (i = 0; i < am; i++) {
2611       jl[i] = am;
2612       il[i] = 0;
2613     }
2614 
2615     /* create and initialize a linked list for storing column indices of the active row k */
2616     nlnk = am + 1;
2617     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2618 
2619     /* initial FreeSpace size is fill*(ai[am]+1) */
2620     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2621     current_space = free_space;
2622     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2623     current_space_lvl = free_space_lvl;
2624 
2625     for (k = 0; k < am; k++) { /* for each active row k */
2626       /* initialize lnk by the column indices of row rip[k] of A */
2627       nzk   = 0;
2628       ncols = ai[rip[k] + 1] - ai[rip[k]];
2629       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2630       ncols_upper = 0;
2631       for (j = 0; j < ncols; j++) {
2632         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2633         if (riip[i] >= k) {         /* only take upper triangular entry */
2634           ajtmp[ncols_upper] = i;
2635           ncols_upper++;
2636         }
2637       }
2638       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2639       nzk += nlnk;
2640 
2641       /* update lnk by computing fill-in for each pivot row to be merged in */
2642       prow = jl[k]; /* 1st pivot row */
2643 
2644       while (prow < k) {
2645         nextprow = jl[prow];
2646 
2647         /* merge prow into k-th row */
2648         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2649         jmax  = ui[prow + 1];
2650         ncols = jmax - jmin;
2651         i     = jmin - ui[prow];
2652         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2653         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2654         j     = *(uj - 1);
2655         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2656         nzk += nlnk;
2657 
2658         /* update il and jl for prow */
2659         if (jmin < jmax) {
2660           il[prow] = jmin;
2661           j        = *cols;
2662           jl[prow] = jl[j];
2663           jl[j]    = prow;
2664         }
2665         prow = nextprow;
2666       }
2667 
2668       /* if free space is not available, make more free space */
2669       if (current_space->local_remaining < nzk) {
2670         i = am - k + 1;                                    /* num of unfactored rows */
2671         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2672         PetscCall(PetscFreeSpaceGet(i, &current_space));
2673         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2674         reallocs++;
2675       }
2676 
2677       /* copy data into free_space and free_space_lvl, then initialize lnk */
2678       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2679       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2680 
2681       /* add the k-th row into il and jl */
2682       if (nzk > 1) {
2683         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2684         jl[k] = jl[i];
2685         jl[i] = k;
2686         il[k] = ui[k] + 1;
2687       }
2688       uj_ptr[k]     = current_space->array;
2689       uj_lvl_ptr[k] = current_space_lvl->array;
2690 
2691       current_space->array += nzk;
2692       current_space->local_used += nzk;
2693       current_space->local_remaining -= nzk;
2694 
2695       current_space_lvl->array += nzk;
2696       current_space_lvl->local_used += nzk;
2697       current_space_lvl->local_remaining -= nzk;
2698 
2699       ui[k + 1] = ui[k] + nzk;
2700     }
2701 
2702   #if defined(PETSC_USE_INFO)
2703     if (ai[am] != 0) {
2704       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2705       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2706       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2707       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2708     } else {
2709       PetscCall(PetscInfo(A, "Empty matrix\n"));
2710     }
2711   #endif
2712 
2713     PetscCall(ISRestoreIndices(perm, &rip));
2714     PetscCall(ISRestoreIndices(iperm, &riip));
2715     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2716     PetscCall(PetscFree(ajtmp));
2717 
2718     /* destroy list of free space and other temporary array(s) */
2719     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2720     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2721     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2722     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2723 
2724   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2725 
2726   /* put together the new matrix in MATSEQSBAIJ format */
2727 
2728   b               = (Mat_SeqSBAIJ *)fact->data;
2729   b->singlemalloc = PETSC_FALSE;
2730 
2731   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2732 
2733   b->j         = uj;
2734   b->i         = ui;
2735   b->diag      = udiag;
2736   b->free_diag = PETSC_TRUE;
2737   b->ilen      = NULL;
2738   b->imax      = NULL;
2739   b->row       = perm;
2740   b->col       = perm;
2741 
2742   PetscCall(PetscObjectReference((PetscObject)perm));
2743   PetscCall(PetscObjectReference((PetscObject)perm));
2744 
2745   b->icol          = iperm;
2746   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2747   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2748   b->maxnz = b->nz = ui[am];
2749   b->free_a        = PETSC_TRUE;
2750   b->free_ij       = PETSC_TRUE;
2751 
2752   fact->info.factor_mallocs   = reallocs;
2753   fact->info.fill_ratio_given = fill;
2754   if (ai[am] != 0) {
2755     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2756   } else {
2757     fact->info.fill_ratio_needed = 0.0;
2758   }
2759   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 #endif
2763 
2764 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2765 {
2766   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2767   Mat_SeqSBAIJ      *b;
2768   PetscBool          perm_identity, missing;
2769   PetscReal          fill = info->fill;
2770   const PetscInt    *rip, *riip;
2771   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2772   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2773   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2774   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2775   PetscBT            lnkbt;
2776   IS                 iperm;
2777 
2778   PetscFunctionBegin;
2779   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2780   PetscCall(MatMissingDiagonal(A, &missing, &i));
2781   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2782 
2783   /* check whether perm is the identity mapping */
2784   PetscCall(ISIdentity(perm, &perm_identity));
2785   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2786   PetscCall(ISGetIndices(iperm, &riip));
2787   PetscCall(ISGetIndices(perm, &rip));
2788 
2789   /* initialization */
2790   PetscCall(PetscMalloc1(am + 1, &ui));
2791   PetscCall(PetscMalloc1(am + 1, &udiag));
2792   ui[0] = 0;
2793 
2794   /* jl: linked list for storing indices of the pivot rows
2795      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2796   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2797   for (i = 0; i < am; i++) {
2798     jl[i] = am;
2799     il[i] = 0;
2800   }
2801 
2802   /* create and initialize a linked list for storing column indices of the active row k */
2803   nlnk = am + 1;
2804   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2805 
2806   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2807   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2808   current_space = free_space;
2809 
2810   for (k = 0; k < am; k++) { /* for each active row k */
2811     /* initialize lnk by the column indices of row rip[k] of A */
2812     nzk   = 0;
2813     ncols = ai[rip[k] + 1] - ai[rip[k]];
2814     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2815     ncols_upper = 0;
2816     for (j = 0; j < ncols; j++) {
2817       i = riip[*(aj + ai[rip[k]] + j)];
2818       if (i >= k) { /* only take upper triangular entry */
2819         cols[ncols_upper] = i;
2820         ncols_upper++;
2821       }
2822     }
2823     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2824     nzk += nlnk;
2825 
2826     /* update lnk by computing fill-in for each pivot row to be merged in */
2827     prow = jl[k]; /* 1st pivot row */
2828 
2829     while (prow < k) {
2830       nextprow = jl[prow];
2831       /* merge prow into k-th row */
2832       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2833       jmax   = ui[prow + 1];
2834       ncols  = jmax - jmin;
2835       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2836       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2837       nzk += nlnk;
2838 
2839       /* update il and jl for prow */
2840       if (jmin < jmax) {
2841         il[prow] = jmin;
2842         j        = *uj_ptr;
2843         jl[prow] = jl[j];
2844         jl[j]    = prow;
2845       }
2846       prow = nextprow;
2847     }
2848 
2849     /* if free space is not available, make more free space */
2850     if (current_space->local_remaining < nzk) {
2851       i = am - k + 1;                                    /* num of unfactored rows */
2852       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2853       PetscCall(PetscFreeSpaceGet(i, &current_space));
2854       reallocs++;
2855     }
2856 
2857     /* copy data into free space, then initialize lnk */
2858     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2859 
2860     /* add the k-th row into il and jl */
2861     if (nzk > 1) {
2862       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2863       jl[k] = jl[i];
2864       jl[i] = k;
2865       il[k] = ui[k] + 1;
2866     }
2867     ui_ptr[k] = current_space->array;
2868 
2869     current_space->array += nzk;
2870     current_space->local_used += nzk;
2871     current_space->local_remaining -= nzk;
2872 
2873     ui[k + 1] = ui[k] + nzk;
2874   }
2875 
2876   PetscCall(ISRestoreIndices(perm, &rip));
2877   PetscCall(ISRestoreIndices(iperm, &riip));
2878   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2879 
2880   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2881   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2882   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2883   PetscCall(PetscLLDestroy(lnk, lnkbt));
2884 
2885   /* put together the new matrix in MATSEQSBAIJ format */
2886 
2887   b               = (Mat_SeqSBAIJ *)fact->data;
2888   b->singlemalloc = PETSC_FALSE;
2889   b->free_a       = PETSC_TRUE;
2890   b->free_ij      = PETSC_TRUE;
2891 
2892   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2893 
2894   b->j         = uj;
2895   b->i         = ui;
2896   b->diag      = udiag;
2897   b->free_diag = PETSC_TRUE;
2898   b->ilen      = NULL;
2899   b->imax      = NULL;
2900   b->row       = perm;
2901   b->col       = perm;
2902 
2903   PetscCall(PetscObjectReference((PetscObject)perm));
2904   PetscCall(PetscObjectReference((PetscObject)perm));
2905 
2906   b->icol          = iperm;
2907   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2908 
2909   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2910 
2911   b->maxnz = b->nz = ui[am];
2912 
2913   fact->info.factor_mallocs   = reallocs;
2914   fact->info.fill_ratio_given = fill;
2915   if (ai[am] != 0) {
2916     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2917     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2918   } else {
2919     fact->info.fill_ratio_needed = 0.0;
2920   }
2921 #if defined(PETSC_USE_INFO)
2922   if (ai[am] != 0) {
2923     PetscReal af = fact->info.fill_ratio_needed;
2924     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2925     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2926     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2927   } else {
2928     PetscCall(PetscInfo(A, "Empty matrix\n"));
2929   }
2930 #endif
2931   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2932   PetscFunctionReturn(PETSC_SUCCESS);
2933 }
2934 
2935 #if 0
2936 // unused
2937 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2938 {
2939   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2940   Mat_SeqSBAIJ      *b;
2941   PetscBool          perm_identity, missing;
2942   PetscReal          fill = info->fill;
2943   const PetscInt    *rip, *riip;
2944   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2945   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2946   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2947   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2948   PetscBT            lnkbt;
2949   IS                 iperm;
2950 
2951   PetscFunctionBegin;
2952   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2953   PetscCall(MatMissingDiagonal(A, &missing, &i));
2954   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2955 
2956   /* check whether perm is the identity mapping */
2957   PetscCall(ISIdentity(perm, &perm_identity));
2958   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2959   PetscCall(ISGetIndices(iperm, &riip));
2960   PetscCall(ISGetIndices(perm, &rip));
2961 
2962   /* initialization */
2963   PetscCall(PetscMalloc1(am + 1, &ui));
2964   ui[0] = 0;
2965 
2966   /* jl: linked list for storing indices of the pivot rows
2967      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2968   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2969   for (i = 0; i < am; i++) {
2970     jl[i] = am;
2971     il[i] = 0;
2972   }
2973 
2974   /* create and initialize a linked list for storing column indices of the active row k */
2975   nlnk = am + 1;
2976   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2977 
2978   /* initial FreeSpace size is fill*(ai[am]+1) */
2979   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2980   current_space = free_space;
2981 
2982   for (k = 0; k < am; k++) { /* for each active row k */
2983     /* initialize lnk by the column indices of row rip[k] of A */
2984     nzk   = 0;
2985     ncols = ai[rip[k] + 1] - ai[rip[k]];
2986     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2987     ncols_upper = 0;
2988     for (j = 0; j < ncols; j++) {
2989       i = riip[*(aj + ai[rip[k]] + j)];
2990       if (i >= k) { /* only take upper triangular entry */
2991         cols[ncols_upper] = i;
2992         ncols_upper++;
2993       }
2994     }
2995     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2996     nzk += nlnk;
2997 
2998     /* update lnk by computing fill-in for each pivot row to be merged in */
2999     prow = jl[k]; /* 1st pivot row */
3000 
3001     while (prow < k) {
3002       nextprow = jl[prow];
3003       /* merge prow into k-th row */
3004       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3005       jmax   = ui[prow + 1];
3006       ncols  = jmax - jmin;
3007       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3008       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3009       nzk += nlnk;
3010 
3011       /* update il and jl for prow */
3012       if (jmin < jmax) {
3013         il[prow] = jmin;
3014         j        = *uj_ptr;
3015         jl[prow] = jl[j];
3016         jl[j]    = prow;
3017       }
3018       prow = nextprow;
3019     }
3020 
3021     /* if free space is not available, make more free space */
3022     if (current_space->local_remaining < nzk) {
3023       i = am - k + 1;                     /* num of unfactored rows */
3024       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3025       PetscCall(PetscFreeSpaceGet(i, &current_space));
3026       reallocs++;
3027     }
3028 
3029     /* copy data into free space, then initialize lnk */
3030     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3031 
3032     /* add the k-th row into il and jl */
3033     if (nzk - 1 > 0) {
3034       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3035       jl[k] = jl[i];
3036       jl[i] = k;
3037       il[k] = ui[k] + 1;
3038     }
3039     ui_ptr[k] = current_space->array;
3040 
3041     current_space->array += nzk;
3042     current_space->local_used += nzk;
3043     current_space->local_remaining -= nzk;
3044 
3045     ui[k + 1] = ui[k] + nzk;
3046   }
3047 
3048   #if defined(PETSC_USE_INFO)
3049   if (ai[am] != 0) {
3050     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3051     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3052     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3053     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3054   } else {
3055     PetscCall(PetscInfo(A, "Empty matrix\n"));
3056   }
3057   #endif
3058 
3059   PetscCall(ISRestoreIndices(perm, &rip));
3060   PetscCall(ISRestoreIndices(iperm, &riip));
3061   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3062 
3063   /* destroy list of free space and other temporary array(s) */
3064   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3065   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3066   PetscCall(PetscLLDestroy(lnk, lnkbt));
3067 
3068   /* put together the new matrix in MATSEQSBAIJ format */
3069 
3070   b               = (Mat_SeqSBAIJ *)fact->data;
3071   b->singlemalloc = PETSC_FALSE;
3072   b->free_a       = PETSC_TRUE;
3073   b->free_ij      = PETSC_TRUE;
3074 
3075   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3076 
3077   b->j    = uj;
3078   b->i    = ui;
3079   b->diag = NULL;
3080   b->ilen = NULL;
3081   b->imax = NULL;
3082   b->row  = perm;
3083   b->col  = perm;
3084 
3085   PetscCall(PetscObjectReference((PetscObject)perm));
3086   PetscCall(PetscObjectReference((PetscObject)perm));
3087 
3088   b->icol          = iperm;
3089   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3090 
3091   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3092   b->maxnz = b->nz = ui[am];
3093 
3094   fact->info.factor_mallocs   = reallocs;
3095   fact->info.fill_ratio_given = fill;
3096   if (ai[am] != 0) {
3097     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3098   } else {
3099     fact->info.fill_ratio_needed = 0.0;
3100   }
3101   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3102   PetscFunctionReturn(PETSC_SUCCESS);
3103 }
3104 #endif
3105 
3106 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3107 {
3108   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3109   PetscInt           n  = A->rmap->n;
3110   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3111   PetscScalar       *x, sum;
3112   const PetscScalar *b;
3113   const MatScalar   *aa = a->a, *v;
3114   PetscInt           i, nz;
3115 
3116   PetscFunctionBegin;
3117   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3118 
3119   PetscCall(VecGetArrayRead(bb, &b));
3120   PetscCall(VecGetArrayWrite(xx, &x));
3121 
3122   /* forward solve the lower triangular */
3123   x[0] = b[0];
3124   v    = aa;
3125   vi   = aj;
3126   for (i = 1; i < n; i++) {
3127     nz  = ai[i + 1] - ai[i];
3128     sum = b[i];
3129     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3130     v += nz;
3131     vi += nz;
3132     x[i] = sum;
3133   }
3134 
3135   /* backward solve the upper triangular */
3136   for (i = n - 1; i >= 0; i--) {
3137     v   = aa + adiag[i + 1] + 1;
3138     vi  = aj + adiag[i + 1] + 1;
3139     nz  = adiag[i] - adiag[i + 1] - 1;
3140     sum = x[i];
3141     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3142     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3143   }
3144 
3145   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3146   PetscCall(VecRestoreArrayRead(bb, &b));
3147   PetscCall(VecRestoreArrayWrite(xx, &x));
3148   PetscFunctionReturn(PETSC_SUCCESS);
3149 }
3150 
3151 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3152 {
3153   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3154   IS                 iscol = a->col, isrow = a->row;
3155   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3156   const PetscInt    *rout, *cout, *r, *c;
3157   PetscScalar       *x, *tmp, sum;
3158   const PetscScalar *b;
3159   const MatScalar   *aa = a->a, *v;
3160 
3161   PetscFunctionBegin;
3162   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3163 
3164   PetscCall(VecGetArrayRead(bb, &b));
3165   PetscCall(VecGetArrayWrite(xx, &x));
3166   tmp = a->solve_work;
3167 
3168   PetscCall(ISGetIndices(isrow, &rout));
3169   r = rout;
3170   PetscCall(ISGetIndices(iscol, &cout));
3171   c = cout;
3172 
3173   /* forward solve the lower triangular */
3174   tmp[0] = b[r[0]];
3175   v      = aa;
3176   vi     = aj;
3177   for (i = 1; i < n; i++) {
3178     nz  = ai[i + 1] - ai[i];
3179     sum = b[r[i]];
3180     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3181     tmp[i] = sum;
3182     v += nz;
3183     vi += nz;
3184   }
3185 
3186   /* backward solve the upper triangular */
3187   for (i = n - 1; i >= 0; i--) {
3188     v   = aa + adiag[i + 1] + 1;
3189     vi  = aj + adiag[i + 1] + 1;
3190     nz  = adiag[i] - adiag[i + 1] - 1;
3191     sum = tmp[i];
3192     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3193     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3194   }
3195 
3196   PetscCall(ISRestoreIndices(isrow, &rout));
3197   PetscCall(ISRestoreIndices(iscol, &cout));
3198   PetscCall(VecRestoreArrayRead(bb, &b));
3199   PetscCall(VecRestoreArrayWrite(xx, &x));
3200   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3201   PetscFunctionReturn(PETSC_SUCCESS);
3202 }
3203 
3204 #if 0
3205 // unused
3206 /*
3207     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3208 */
3209 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3210 {
3211   Mat             B = *fact;
3212   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3213   IS              isicol;
3214   const PetscInt *r, *ic;
3215   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3216   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3217   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3218   PetscInt        nlnk, *lnk;
3219   PetscBT         lnkbt;
3220   PetscBool       row_identity, icol_identity;
3221   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3222   const PetscInt *ics;
3223   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3224   PetscReal       dt = info->dt, shift = info->shiftamount;
3225   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3226   PetscBool       missing;
3227 
3228   PetscFunctionBegin;
3229   if (dt == PETSC_DEFAULT) dt = 0.005;
3230   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3231 
3232   /* ------- symbolic factorization, can be reused ---------*/
3233   PetscCall(MatMissingDiagonal(A, &missing, &i));
3234   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3235   adiag = a->diag;
3236 
3237   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3238 
3239   /* bdiag is location of diagonal in factor */
3240   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3241   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3242 
3243   /* allocate row pointers bi */
3244   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3245 
3246   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3247   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3248   nnz_max = ai[n] + 2 * n * dtcount + 2;
3249 
3250   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3251   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3252 
3253   /* put together the new matrix */
3254   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3255   b = (Mat_SeqAIJ *)B->data;
3256 
3257   b->free_a       = PETSC_TRUE;
3258   b->free_ij      = PETSC_TRUE;
3259   b->singlemalloc = PETSC_FALSE;
3260 
3261   b->a    = ba;
3262   b->j    = bj;
3263   b->i    = bi;
3264   b->diag = bdiag;
3265   b->ilen = NULL;
3266   b->imax = NULL;
3267   b->row  = isrow;
3268   b->col  = iscol;
3269   PetscCall(PetscObjectReference((PetscObject)isrow));
3270   PetscCall(PetscObjectReference((PetscObject)iscol));
3271   b->icol = isicol;
3272 
3273   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3274   b->maxnz = nnz_max;
3275 
3276   B->factortype            = MAT_FACTOR_ILUDT;
3277   B->info.factor_mallocs   = 0;
3278   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3279   /* ------- end of symbolic factorization ---------*/
3280 
3281   PetscCall(ISGetIndices(isrow, &r));
3282   PetscCall(ISGetIndices(isicol, &ic));
3283   ics = ic;
3284 
3285   /* linked list for storing column indices of the active row */
3286   nlnk = n + 1;
3287   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3288 
3289   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3290   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3291   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3292   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3293   PetscCall(PetscArrayzero(rtmp, n));
3294 
3295   bi[0]         = 0;
3296   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3297   bdiag_rev[n]  = bdiag[0];
3298   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3299   for (i = 0; i < n; i++) {
3300     /* copy initial fill into linked list */
3301     nzi = ai[r[i] + 1] - ai[r[i]];
3302     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3303     nzi_al = adiag[r[i]] - ai[r[i]];
3304     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3305     ajtmp  = aj + ai[r[i]];
3306     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3307 
3308     /* load in initial (unfactored row) */
3309     aatmp = a->a + ai[r[i]];
3310     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3311 
3312     /* add pivot rows into linked list */
3313     row = lnk[n];
3314     while (row < i) {
3315       nzi_bl = bi[row + 1] - bi[row] + 1;
3316       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3317       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3318       nzi += nlnk;
3319       row = lnk[row];
3320     }
3321 
3322     /* copy data from lnk into jtmp, then initialize lnk */
3323     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3324 
3325     /* numerical factorization */
3326     bjtmp = jtmp;
3327     row   = *bjtmp++; /* 1st pivot row */
3328     while (row < i) {
3329       pc         = rtmp + row;
3330       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3331       multiplier = (*pc) * (*pv);
3332       *pc        = multiplier;
3333       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3334         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3335         pv = ba + bdiag[row + 1] + 1;
3336         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3337         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3338         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3339       }
3340       row = *bjtmp++;
3341     }
3342 
3343     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3344     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3345     nzi_bl   = 0;
3346     j        = 0;
3347     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3348       vtmp[j]       = rtmp[jtmp[j]];
3349       rtmp[jtmp[j]] = 0.0;
3350       nzi_bl++;
3351       j++;
3352     }
3353     nzi_bu = nzi - nzi_bl - 1;
3354     while (j < nzi) {
3355       vtmp[j]       = rtmp[jtmp[j]];
3356       rtmp[jtmp[j]] = 0.0;
3357       j++;
3358     }
3359 
3360     bjtmp = bj + bi[i];
3361     batmp = ba + bi[i];
3362     /* apply level dropping rule to L part */
3363     ncut = nzi_al + dtcount;
3364     if (ncut < nzi_bl) {
3365       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3366       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3367     } else {
3368       ncut = nzi_bl;
3369     }
3370     for (j = 0; j < ncut; j++) {
3371       bjtmp[j] = jtmp[j];
3372       batmp[j] = vtmp[j];
3373     }
3374     bi[i + 1] = bi[i] + ncut;
3375     nzi       = ncut + 1;
3376 
3377     /* apply level dropping rule to U part */
3378     ncut = nzi_au + dtcount;
3379     if (ncut < nzi_bu) {
3380       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3381       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3382     } else {
3383       ncut = nzi_bu;
3384     }
3385     nzi += ncut;
3386 
3387     /* mark bdiagonal */
3388     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3389     bdiag_rev[n - i - 1] = bdiag[i + 1];
3390     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3391     bjtmp                = bj + bdiag[i];
3392     batmp                = ba + bdiag[i];
3393     *bjtmp               = i;
3394     *batmp               = diag_tmp; /* rtmp[i]; */
3395     if (*batmp == 0.0) *batmp = dt + shift;
3396     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3397 
3398     bjtmp = bj + bdiag[i + 1] + 1;
3399     batmp = ba + bdiag[i + 1] + 1;
3400     for (k = 0; k < ncut; k++) {
3401       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3402       batmp[k] = vtmp[nzi_bl + 1 + k];
3403     }
3404 
3405     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3406   }              /* for (i=0; i<n; i++) */
3407   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3408 
3409   PetscCall(ISRestoreIndices(isrow, &r));
3410   PetscCall(ISRestoreIndices(isicol, &ic));
3411 
3412   PetscCall(PetscLLDestroy(lnk, lnkbt));
3413   PetscCall(PetscFree2(im, jtmp));
3414   PetscCall(PetscFree2(rtmp, vtmp));
3415   PetscCall(PetscFree(bdiag_rev));
3416 
3417   PetscCall(PetscLogFlops(B->cmap->n));
3418   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3419 
3420   PetscCall(ISIdentity(isrow, &row_identity));
3421   PetscCall(ISIdentity(isicol, &icol_identity));
3422   if (row_identity && icol_identity) {
3423     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3424   } else {
3425     B->ops->solve = MatSolve_SeqAIJ;
3426   }
3427 
3428   B->ops->solveadd          = NULL;
3429   B->ops->solvetranspose    = NULL;
3430   B->ops->solvetransposeadd = NULL;
3431   B->ops->matsolve          = NULL;
3432   B->assembled              = PETSC_TRUE;
3433   B->preallocated           = PETSC_TRUE;
3434   PetscFunctionReturn(PETSC_SUCCESS);
3435 }
3436 
3437 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3438 /*
3439     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3440 */
3441 
3442 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3443 {
3444   PetscFunctionBegin;
3445   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3446   PetscFunctionReturn(PETSC_SUCCESS);
3447 }
3448 
3449 /*
3450    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3451    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3452 */
3453 /*
3454     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3455 */
3456 
3457 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3458 {
3459   Mat             C = fact;
3460   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3461   IS              isrow = b->row, isicol = b->icol;
3462   const PetscInt *r, *ic, *ics;
3463   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3464   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3465   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3466   PetscReal       dt = info->dt, shift = info->shiftamount;
3467   PetscBool       row_identity, col_identity;
3468 
3469   PetscFunctionBegin;
3470   PetscCall(ISGetIndices(isrow, &r));
3471   PetscCall(ISGetIndices(isicol, &ic));
3472   PetscCall(PetscMalloc1(n + 1, &rtmp));
3473   ics = ic;
3474 
3475   for (i = 0; i < n; i++) {
3476     /* initialize rtmp array */
3477     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3478     bjtmp = bj + bi[i];
3479     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3480     rtmp[i] = 0.0;
3481     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3482     bjtmp   = bj + bdiag[i + 1] + 1;
3483     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3484 
3485     /* load in initial unfactored row of A */
3486     nz    = ai[r[i] + 1] - ai[r[i]];
3487     ajtmp = aj + ai[r[i]];
3488     v     = aa + ai[r[i]];
3489     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3490 
3491     /* numerical factorization */
3492     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3493     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3494     k     = 0;
3495     while (k < nzl) {
3496       row        = *bjtmp++;
3497       pc         = rtmp + row;
3498       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3499       multiplier = (*pc) * (*pv);
3500       *pc        = multiplier;
3501       if (PetscAbsScalar(multiplier) > dt) {
3502         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3503         pv = b->a + bdiag[row + 1] + 1;
3504         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3505         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3506         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3507       }
3508       k++;
3509     }
3510 
3511     /* finished row so stick it into b->a */
3512     /* L-part */
3513     pv  = b->a + bi[i];
3514     pj  = bj + bi[i];
3515     nzl = bi[i + 1] - bi[i];
3516     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3517 
3518     /* diagonal: invert diagonal entries for simpler triangular solves */
3519     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3520     b->a[bdiag[i]] = 1.0 / rtmp[i];
3521 
3522     /* U-part */
3523     pv  = b->a + bdiag[i + 1] + 1;
3524     pj  = bj + bdiag[i + 1] + 1;
3525     nzu = bdiag[i] - bdiag[i + 1] - 1;
3526     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3527   }
3528 
3529   PetscCall(PetscFree(rtmp));
3530   PetscCall(ISRestoreIndices(isicol, &ic));
3531   PetscCall(ISRestoreIndices(isrow, &r));
3532 
3533   PetscCall(ISIdentity(isrow, &row_identity));
3534   PetscCall(ISIdentity(isicol, &col_identity));
3535   if (row_identity && col_identity) {
3536     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3537   } else {
3538     C->ops->solve = MatSolve_SeqAIJ;
3539   }
3540   C->ops->solveadd          = NULL;
3541   C->ops->solvetranspose    = NULL;
3542   C->ops->solvetransposeadd = NULL;
3543   C->ops->matsolve          = NULL;
3544   C->assembled              = PETSC_TRUE;
3545   C->preallocated           = PETSC_TRUE;
3546 
3547   PetscCall(PetscLogFlops(C->cmap->n));
3548   PetscFunctionReturn(PETSC_SUCCESS);
3549 }
3550 #endif
3551