xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 5d7a6ebe9dde080aedbe86be0085708de8f97bb7)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6 
7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
8 {
9   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
10   MatScalar    *dd   = fact->a;
11   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
12 
13   PetscFunctionBegin;
14   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
15 
16   nneg_tmp = 0;
17   npos_tmp = 0;
18   for (i = 0; i < mbs; i++) {
19     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21     fi++;
22   }
23   if (nneg) *nneg = nneg_tmp;
24   if (npos) *npos = npos_tmp;
25   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 /*
30   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
31   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
32 */
33 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
34 {
35   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
36   const PetscInt *rip, *ai, *aj;
37   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
38   PetscInt        m, reallocs = 0, prow;
39   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
40   PetscReal       f = info->fill;
41   PetscBool       perm_identity;
42 
43   PetscFunctionBegin;
44   /* check whether perm is the identity mapping */
45   PetscCall(ISIdentity(perm, &perm_identity));
46   PetscCall(ISGetIndices(perm, &rip));
47 
48   if (perm_identity) { /* without permutation */
49     a->permute = PETSC_FALSE;
50 
51     ai = a->i;
52     aj = a->j;
53   } else { /* non-trivial permutation */
54     a->permute = PETSC_TRUE;
55 
56     PetscCall(MatReorderingSeqSBAIJ(A, perm));
57 
58     ai = a->inew;
59     aj = a->jnew;
60   }
61 
62   /* initialization */
63   PetscCall(PetscMalloc1(mbs + 1, &iu));
64   umax = (PetscInt)(f * ai[mbs] + 1);
65   umax += mbs + 1;
66   PetscCall(PetscMalloc1(umax, &ju));
67   iu[0] = mbs + 1;
68   juidx = mbs + 1; /* index for ju */
69   /* jl linked list for pivot row -- linked list for col index */
70   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
71   for (i = 0; i < mbs; i++) {
72     jl[i] = mbs;
73     q[i]  = 0;
74   }
75 
76   /* for each row k */
77   for (k = 0; k < mbs; k++) {
78     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
79     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
80     q[k] = mbs;
81     /* initialize nonzero structure of k-th row to row rip[k] of A */
82     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
83     jmax = ai[rip[k] + 1];
84     for (j = jmin; j < jmax; j++) {
85       vj = rip[aj[j]]; /* col. value */
86       if (vj > k) {
87         qm = k;
88         do {
89           m  = qm;
90           qm = q[m];
91         } while (qm < vj);
92         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
93         nzk++;
94         q[m]  = vj;
95         q[vj] = qm;
96       } /* if (vj > k) */
97     }   /* for (j=jmin; j<jmax; j++) */
98 
99     /* modify nonzero structure of k-th row by computing fill-in
100        for each row i to be merged in */
101     prow = k;
102     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
103 
104     while (prow < k) {
105       /* merge row prow into k-th row */
106       jmin = iu[prow] + 1;
107       jmax = iu[prow + 1];
108       qm   = k;
109       for (j = jmin; j < jmax; j++) {
110         vj = ju[j];
111         do {
112           m  = qm;
113           qm = q[m];
114         } while (qm < vj);
115         if (qm != vj) {
116           nzk++;
117           q[m]  = vj;
118           q[vj] = qm;
119           qm    = vj;
120         }
121       }
122       prow = jl[prow]; /* next pivot row */
123     }
124 
125     /* add k to row list for first nonzero element in k-th row */
126     if (nzk > 0) {
127       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
128       jl[k] = jl[i];
129       jl[i] = k;
130     }
131     iu[k + 1] = iu[k] + nzk;
132 
133     /* allocate more space to ju if needed */
134     if (iu[k + 1] > umax) {
135       /* estimate how much additional space we will need */
136       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
137       /* just double the memory each time */
138       maxadd = umax;
139       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
140       umax += maxadd;
141 
142       /* allocate a longer ju */
143       PetscCall(PetscMalloc1(umax, &jutmp));
144       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
145       PetscCall(PetscFree(ju));
146       ju = jutmp;
147       reallocs++; /* count how many times we realloc */
148     }
149 
150     /* save nonzero structure of k-th row in ju */
151     i = k;
152     while (nzk--) {
153       i           = q[i];
154       ju[juidx++] = i;
155     }
156   }
157 
158 #if defined(PETSC_USE_INFO)
159   if (ai[mbs] != 0) {
160     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
161     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
162     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
163     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
164     PetscCall(PetscInfo(A, "for best performance.\n"));
165   } else {
166     PetscCall(PetscInfo(A, "Empty matrix\n"));
167   }
168 #endif
169 
170   PetscCall(ISRestoreIndices(perm, &rip));
171   PetscCall(PetscFree2(jl, q));
172 
173   /* put together the new matrix */
174   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
175 
176   b               = (Mat_SeqSBAIJ *)(F)->data;
177   b->singlemalloc = PETSC_FALSE;
178   b->free_a       = PETSC_TRUE;
179   b->free_ij      = PETSC_TRUE;
180 
181   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
182   b->j    = ju;
183   b->i    = iu;
184   b->diag = NULL;
185   b->ilen = NULL;
186   b->imax = NULL;
187   b->row  = perm;
188 
189   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
190 
191   PetscCall(PetscObjectReference((PetscObject)perm));
192 
193   b->icol = perm;
194   PetscCall(PetscObjectReference((PetscObject)perm));
195   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
196   /* In b structure:  Free imax, ilen, old a, old j.
197      Allocate idnew, solve_work, new a, new j */
198   b->maxnz = b->nz = iu[mbs];
199 
200   (F)->info.factor_mallocs   = reallocs;
201   (F)->info.fill_ratio_given = f;
202   if (ai[mbs] != 0) {
203     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
204   } else {
205     (F)->info.fill_ratio_needed = 0.0;
206   }
207   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
208   PetscFunctionReturn(PETSC_SUCCESS);
209 }
210 /*
211     Symbolic U^T*D*U factorization for SBAIJ format.
212     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
213 */
214 #include <petscbt.h>
215 #include <../src/mat/utils/freespace.h>
216 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
217 {
218   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
219   Mat_SeqSBAIJ      *b;
220   PetscBool          perm_identity, missing;
221   PetscReal          fill = info->fill;
222   const PetscInt    *rip, *ai = a->i, *aj = a->j;
223   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
224   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
225   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
226   PetscFreeSpaceList free_space = NULL, current_space = NULL;
227   PetscBT            lnkbt;
228 
229   PetscFunctionBegin;
230   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
231   PetscCall(MatMissingDiagonal(A, &missing, &i));
232   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
233   if (bs > 1) {
234     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
235     PetscFunctionReturn(PETSC_SUCCESS);
236   }
237 
238   /* check whether perm is the identity mapping */
239   PetscCall(ISIdentity(perm, &perm_identity));
240   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
241   a->permute = PETSC_FALSE;
242   PetscCall(ISGetIndices(perm, &rip));
243 
244   /* initialization */
245   PetscCall(PetscMalloc1(mbs + 1, &ui));
246   PetscCall(PetscMalloc1(mbs + 1, &udiag));
247   ui[0] = 0;
248 
249   /* jl: linked list for storing indices of the pivot rows
250      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
251   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
252   for (i = 0; i < mbs; i++) {
253     jl[i] = mbs;
254     il[i] = 0;
255   }
256 
257   /* create and initialize a linked list for storing column indices of the active row k */
258   nlnk = mbs + 1;
259   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
260 
261   /* initial FreeSpace size is fill*(ai[mbs]+1) */
262   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
263   current_space = free_space;
264 
265   for (k = 0; k < mbs; k++) { /* for each active row k */
266     /* initialize lnk by the column indices of row rip[k] of A */
267     nzk   = 0;
268     ncols = ai[k + 1] - ai[k];
269     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
270     for (j = 0; j < ncols; j++) {
271       i       = *(aj + ai[k] + j);
272       cols[j] = i;
273     }
274     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
275     nzk += nlnk;
276 
277     /* update lnk by computing fill-in for each pivot row to be merged in */
278     prow = jl[k]; /* 1st pivot row */
279 
280     while (prow < k) {
281       nextprow = jl[prow];
282       /* merge prow into k-th row */
283       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
284       jmax   = ui[prow + 1];
285       ncols  = jmax - jmin;
286       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
287       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
288       nzk += nlnk;
289 
290       /* update il and jl for prow */
291       if (jmin < jmax) {
292         il[prow] = jmin;
293         j        = *uj_ptr;
294         jl[prow] = jl[j];
295         jl[j]    = prow;
296       }
297       prow = nextprow;
298     }
299 
300     /* if free space is not available, make more free space */
301     if (current_space->local_remaining < nzk) {
302       i = mbs - k + 1;                                   /* num of unfactored rows */
303       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
304       PetscCall(PetscFreeSpaceGet(i, &current_space));
305       reallocs++;
306     }
307 
308     /* copy data into free space, then initialize lnk */
309     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
310 
311     /* add the k-th row into il and jl */
312     if (nzk > 1) {
313       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
314       jl[k] = jl[i];
315       jl[i] = k;
316       il[k] = ui[k] + 1;
317     }
318     ui_ptr[k] = current_space->array;
319 
320     current_space->array += nzk;
321     current_space->local_used += nzk;
322     current_space->local_remaining -= nzk;
323 
324     ui[k + 1] = ui[k] + nzk;
325   }
326 
327   PetscCall(ISRestoreIndices(perm, &rip));
328   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
329 
330   /* destroy list of free space and other temporary array(s) */
331   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
332   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
333   PetscCall(PetscLLDestroy(lnk, lnkbt));
334 
335   /* put together the new matrix in MATSEQSBAIJ format */
336   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
337 
338   b               = (Mat_SeqSBAIJ *)fact->data;
339   b->singlemalloc = PETSC_FALSE;
340   b->free_a       = PETSC_TRUE;
341   b->free_ij      = PETSC_TRUE;
342 
343   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
344 
345   b->j         = uj;
346   b->i         = ui;
347   b->diag      = udiag;
348   b->free_diag = PETSC_TRUE;
349   b->ilen      = NULL;
350   b->imax      = NULL;
351   b->row       = perm;
352   b->icol      = perm;
353 
354   PetscCall(PetscObjectReference((PetscObject)perm));
355   PetscCall(PetscObjectReference((PetscObject)perm));
356 
357   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
358 
359   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
360 
361   b->maxnz = b->nz = ui[mbs];
362 
363   fact->info.factor_mallocs   = reallocs;
364   fact->info.fill_ratio_given = fill;
365   if (ai[mbs] != 0) {
366     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
367   } else {
368     fact->info.fill_ratio_needed = 0.0;
369   }
370 #if defined(PETSC_USE_INFO)
371   if (ai[mbs] != 0) {
372     PetscReal af = fact->info.fill_ratio_needed;
373     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
374     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
375     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
376   } else {
377     PetscCall(PetscInfo(A, "Empty matrix\n"));
378   }
379 #endif
380   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
381   PetscFunctionReturn(PETSC_SUCCESS);
382 }
383 
384 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
385 {
386   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
387   Mat_SeqSBAIJ      *b;
388   PetscBool          perm_identity, missing;
389   PetscReal          fill = info->fill;
390   const PetscInt    *rip, *ai, *aj;
391   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
392   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
393   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
394   PetscFreeSpaceList free_space = NULL, current_space = NULL;
395   PetscBT            lnkbt;
396 
397   PetscFunctionBegin;
398   PetscCall(MatMissingDiagonal(A, &missing, &d));
399   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
400 
401   /*
402    This code originally uses Modified Sparse Row (MSR) storage
403    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
404    Then it is rewritten so the factor B takes seqsbaij format. However the associated
405    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
406    thus the original code in MSR format is still used for these cases.
407    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
408    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
409   */
410   if (bs > 1) {
411     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
412     PetscFunctionReturn(PETSC_SUCCESS);
413   }
414 
415   /* check whether perm is the identity mapping */
416   PetscCall(ISIdentity(perm, &perm_identity));
417   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
418   a->permute = PETSC_FALSE;
419   ai         = a->i;
420   aj         = a->j;
421   PetscCall(ISGetIndices(perm, &rip));
422 
423   /* initialization */
424   PetscCall(PetscMalloc1(mbs + 1, &ui));
425   ui[0] = 0;
426 
427   /* jl: linked list for storing indices of the pivot rows
428      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
429   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
430   for (i = 0; i < mbs; i++) {
431     jl[i] = mbs;
432     il[i] = 0;
433   }
434 
435   /* create and initialize a linked list for storing column indices of the active row k */
436   nlnk = mbs + 1;
437   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
438 
439   /* initial FreeSpace size is fill*(ai[mbs]+1) */
440   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
441   current_space = free_space;
442 
443   for (k = 0; k < mbs; k++) { /* for each active row k */
444     /* initialize lnk by the column indices of row rip[k] of A */
445     nzk   = 0;
446     ncols = ai[rip[k] + 1] - ai[rip[k]];
447     for (j = 0; j < ncols; j++) {
448       i       = *(aj + ai[rip[k]] + j);
449       cols[j] = rip[i];
450     }
451     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
452     nzk += nlnk;
453 
454     /* update lnk by computing fill-in for each pivot row to be merged in */
455     prow = jl[k]; /* 1st pivot row */
456 
457     while (prow < k) {
458       nextprow = jl[prow];
459       /* merge prow into k-th row */
460       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
461       jmax   = ui[prow + 1];
462       ncols  = jmax - jmin;
463       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
464       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
465       nzk += nlnk;
466 
467       /* update il and jl for prow */
468       if (jmin < jmax) {
469         il[prow] = jmin;
470 
471         j        = *uj_ptr;
472         jl[prow] = jl[j];
473         jl[j]    = prow;
474       }
475       prow = nextprow;
476     }
477 
478     /* if free space is not available, make more free space */
479     if (current_space->local_remaining < nzk) {
480       i = mbs - k + 1;                                                            /* num of unfactored rows */
481       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
482       PetscCall(PetscFreeSpaceGet(i, &current_space));
483       reallocs++;
484     }
485 
486     /* copy data into free space, then initialize lnk */
487     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
488 
489     /* add the k-th row into il and jl */
490     if (nzk - 1 > 0) {
491       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
492       jl[k] = jl[i];
493       jl[i] = k;
494       il[k] = ui[k] + 1;
495     }
496     ui_ptr[k] = current_space->array;
497 
498     current_space->array += nzk;
499     current_space->local_used += nzk;
500     current_space->local_remaining -= nzk;
501 
502     ui[k + 1] = ui[k] + nzk;
503   }
504 
505   PetscCall(ISRestoreIndices(perm, &rip));
506   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
507 
508   /* destroy list of free space and other temporary array(s) */
509   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
510   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
511   PetscCall(PetscLLDestroy(lnk, lnkbt));
512 
513   /* put together the new matrix in MATSEQSBAIJ format */
514   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
515 
516   b               = (Mat_SeqSBAIJ *)fact->data;
517   b->singlemalloc = PETSC_FALSE;
518   b->free_a       = PETSC_TRUE;
519   b->free_ij      = PETSC_TRUE;
520 
521   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
522 
523   b->j    = uj;
524   b->i    = ui;
525   b->diag = NULL;
526   b->ilen = NULL;
527   b->imax = NULL;
528   b->row  = perm;
529 
530   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
531 
532   PetscCall(PetscObjectReference((PetscObject)perm));
533   b->icol = perm;
534   PetscCall(PetscObjectReference((PetscObject)perm));
535   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
536   b->maxnz = b->nz = ui[mbs];
537 
538   fact->info.factor_mallocs   = reallocs;
539   fact->info.fill_ratio_given = fill;
540   if (ai[mbs] != 0) {
541     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
542   } else {
543     fact->info.fill_ratio_needed = 0.0;
544   }
545 #if defined(PETSC_USE_INFO)
546   if (ai[mbs] != 0) {
547     PetscReal af = fact->info.fill_ratio_needed;
548     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
549     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
550     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
551   } else {
552     PetscCall(PetscInfo(A, "Empty matrix\n"));
553   }
554 #endif
555   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
560 {
561   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
562   IS              perm = b->row;
563   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
564   PetscInt        i, j;
565   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
566   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
567   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
568   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
569   MatScalar      *work;
570   PetscInt       *pivots;
571   PetscBool       allowzeropivot, zeropivotdetected;
572 
573   PetscFunctionBegin;
574   /* initialization */
575   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
576   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
577   allowzeropivot = PetscNot(A->erroriffailure);
578 
579   il[0] = 0;
580   for (i = 0; i < mbs; i++) jl[i] = mbs;
581 
582   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
583   PetscCall(PetscMalloc1(bs, &pivots));
584 
585   PetscCall(ISGetIndices(perm, &perm_ptr));
586 
587   /* check permutation */
588   if (!a->permute) {
589     ai = a->i;
590     aj = a->j;
591     aa = a->a;
592   } else {
593     ai = a->inew;
594     aj = a->jnew;
595     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
596     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
597     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
598     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
599 
600     for (i = 0; i < mbs; i++) {
601       jmin = ai[i];
602       jmax = ai[i + 1];
603       for (j = jmin; j < jmax; j++) {
604         while (a2anew[j] != j) {
605           k         = a2anew[j];
606           a2anew[j] = a2anew[k];
607           a2anew[k] = k;
608           for (k1 = 0; k1 < bs2; k1++) {
609             dk[k1]           = aa[k * bs2 + k1];
610             aa[k * bs2 + k1] = aa[j * bs2 + k1];
611             aa[j * bs2 + k1] = dk[k1];
612           }
613         }
614         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
615         if (i > aj[j]) {
616           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
617           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
618           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
619             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
620           }
621         }
622       }
623     }
624     PetscCall(PetscFree(a2anew));
625   }
626 
627   /* for each row k */
628   for (k = 0; k < mbs; k++) {
629     /*initialize k-th row with elements nonzero in row perm(k) of A */
630     jmin = ai[perm_ptr[k]];
631     jmax = ai[perm_ptr[k] + 1];
632 
633     ap = aa + jmin * bs2;
634     for (j = jmin; j < jmax; j++) {
635       vj       = perm_ptr[aj[j]]; /* block col. index */
636       rtmp_ptr = rtmp + vj * bs2;
637       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
638     }
639 
640     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
641     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
642     i = jl[k]; /* first row to be added to k_th row  */
643 
644     while (i < k) {
645       nexti = jl[i]; /* next row to be added to k_th row */
646 
647       /* compute multiplier */
648       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
649 
650       /* uik = -inv(Di)*U_bar(i,k) */
651       diag = ba + i * bs2;
652       u    = ba + ili * bs2;
653       PetscCall(PetscArrayzero(uik, bs2));
654       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
655 
656       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
657       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
658       PetscCall(PetscLogFlops(4.0 * bs * bs2));
659 
660       /* update -U(i,k) */
661       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
662 
663       /* add multiple of row i to k-th row ... */
664       jmin = ili + 1;
665       jmax = bi[i + 1];
666       if (jmin < jmax) {
667         for (j = jmin; j < jmax; j++) {
668           /* rtmp += -U(i,k)^T * U_bar(i,j) */
669           rtmp_ptr = rtmp + bj[j] * bs2;
670           u        = ba + j * bs2;
671           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
672         }
673         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
674 
675         /* ... add i to row list for next nonzero entry */
676         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
677         j     = bj[jmin];
678         jl[i] = jl[j];
679         jl[j] = i; /* update jl */
680       }
681       i = nexti;
682     }
683 
684     /* save nonzero entries in k-th row of U ... */
685 
686     /* invert diagonal block */
687     diag = ba + k * bs2;
688     PetscCall(PetscArraycpy(diag, dk, bs2));
689 
690     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
691     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
692 
693     jmin = bi[k];
694     jmax = bi[k + 1];
695     if (jmin < jmax) {
696       for (j = jmin; j < jmax; j++) {
697         vj       = bj[j]; /* block col. index of U */
698         u        = ba + j * bs2;
699         rtmp_ptr = rtmp + vj * bs2;
700         for (k1 = 0; k1 < bs2; k1++) {
701           *u++        = *rtmp_ptr;
702           *rtmp_ptr++ = 0.0;
703         }
704       }
705 
706       /* ... add k to row list for first nonzero entry in k-th row */
707       il[k] = jmin;
708       i     = bj[jmin];
709       jl[k] = jl[i];
710       jl[i] = k;
711     }
712   }
713 
714   PetscCall(PetscFree(rtmp));
715   PetscCall(PetscFree2(il, jl));
716   PetscCall(PetscFree3(dk, uik, work));
717   PetscCall(PetscFree(pivots));
718   if (a->permute) PetscCall(PetscFree(aa));
719 
720   PetscCall(ISRestoreIndices(perm, &perm_ptr));
721 
722   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
723   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
724   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
725   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
726 
727   C->assembled    = PETSC_TRUE;
728   C->preallocated = PETSC_TRUE;
729 
730   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
731   PetscFunctionReturn(PETSC_SUCCESS);
732 }
733 
734 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
735 {
736   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
737   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
738   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
739   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
740   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
741   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
742   MatScalar    *work;
743   PetscInt     *pivots;
744   PetscBool     allowzeropivot, zeropivotdetected;
745 
746   PetscFunctionBegin;
747   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
748   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
749   il[0] = 0;
750   for (i = 0; i < mbs; i++) jl[i] = mbs;
751 
752   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
753   PetscCall(PetscMalloc1(bs, &pivots));
754   allowzeropivot = PetscNot(A->erroriffailure);
755 
756   ai = a->i;
757   aj = a->j;
758   aa = a->a;
759 
760   /* for each row k */
761   for (k = 0; k < mbs; k++) {
762     /*initialize k-th row with elements nonzero in row k of A */
763     jmin = ai[k];
764     jmax = ai[k + 1];
765     ap   = aa + jmin * bs2;
766     for (j = jmin; j < jmax; j++) {
767       vj       = aj[j]; /* block col. index */
768       rtmp_ptr = rtmp + vj * bs2;
769       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
770     }
771 
772     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
773     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
774     i = jl[k]; /* first row to be added to k_th row  */
775 
776     while (i < k) {
777       nexti = jl[i]; /* next row to be added to k_th row */
778 
779       /* compute multiplier */
780       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
781 
782       /* uik = -inv(Di)*U_bar(i,k) */
783       diag = ba + i * bs2;
784       u    = ba + ili * bs2;
785       PetscCall(PetscArrayzero(uik, bs2));
786       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
787 
788       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
789       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
790       PetscCall(PetscLogFlops(2.0 * bs * bs2));
791 
792       /* update -U(i,k) */
793       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
794 
795       /* add multiple of row i to k-th row ... */
796       jmin = ili + 1;
797       jmax = bi[i + 1];
798       if (jmin < jmax) {
799         for (j = jmin; j < jmax; j++) {
800           /* rtmp += -U(i,k)^T * U_bar(i,j) */
801           rtmp_ptr = rtmp + bj[j] * bs2;
802           u        = ba + j * bs2;
803           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
804         }
805         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
806 
807         /* ... add i to row list for next nonzero entry */
808         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
809         j     = bj[jmin];
810         jl[i] = jl[j];
811         jl[j] = i; /* update jl */
812       }
813       i = nexti;
814     }
815 
816     /* save nonzero entries in k-th row of U ... */
817 
818     /* invert diagonal block */
819     diag = ba + k * bs2;
820     PetscCall(PetscArraycpy(diag, dk, bs2));
821 
822     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
823     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
824 
825     jmin = bi[k];
826     jmax = bi[k + 1];
827     if (jmin < jmax) {
828       for (j = jmin; j < jmax; j++) {
829         vj       = bj[j]; /* block col. index of U */
830         u        = ba + j * bs2;
831         rtmp_ptr = rtmp + vj * bs2;
832         for (k1 = 0; k1 < bs2; k1++) {
833           *u++        = *rtmp_ptr;
834           *rtmp_ptr++ = 0.0;
835         }
836       }
837 
838       /* ... add k to row list for first nonzero entry in k-th row */
839       il[k] = jmin;
840       i     = bj[jmin];
841       jl[k] = jl[i];
842       jl[i] = k;
843     }
844   }
845 
846   PetscCall(PetscFree(rtmp));
847   PetscCall(PetscFree2(il, jl));
848   PetscCall(PetscFree3(dk, uik, work));
849   PetscCall(PetscFree(pivots));
850 
851   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
855   C->assembled           = PETSC_TRUE;
856   C->preallocated        = PETSC_TRUE;
857 
858   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 /*
863     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
864     Version for blocks 2 by 2.
865 */
866 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
867 {
868   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
869   IS              perm = b->row;
870   const PetscInt *ai, *aj, *perm_ptr;
871   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
872   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
873   MatScalar      *ba = b->a, *aa, *ap;
874   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
875   PetscReal       shift = info->shiftamount;
876   PetscBool       allowzeropivot, zeropivotdetected;
877 
878   PetscFunctionBegin;
879   allowzeropivot = PetscNot(A->erroriffailure);
880 
881   /* initialization */
882   /* il and jl record the first nonzero element in each row of the accessing
883      window U(0:k, k:mbs-1).
884      jl:    list of rows to be added to uneliminated rows
885             i>= k: jl(i) is the first row to be added to row i
886             i<  k: jl(i) is the row following row i in some list of rows
887             jl(i) = mbs indicates the end of a list
888      il(i): points to the first nonzero element in columns k,...,mbs-1 of
889             row i of U */
890   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
891   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
892   il[0] = 0;
893   for (i = 0; i < mbs; i++) jl[i] = mbs;
894 
895   PetscCall(ISGetIndices(perm, &perm_ptr));
896 
897   /* check permutation */
898   if (!a->permute) {
899     ai = a->i;
900     aj = a->j;
901     aa = a->a;
902   } else {
903     ai = a->inew;
904     aj = a->jnew;
905     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
906     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
907     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
908     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
909 
910     for (i = 0; i < mbs; i++) {
911       jmin = ai[i];
912       jmax = ai[i + 1];
913       for (j = jmin; j < jmax; j++) {
914         while (a2anew[j] != j) {
915           k         = a2anew[j];
916           a2anew[j] = a2anew[k];
917           a2anew[k] = k;
918           for (k1 = 0; k1 < 4; k1++) {
919             dk[k1]         = aa[k * 4 + k1];
920             aa[k * 4 + k1] = aa[j * 4 + k1];
921             aa[j * 4 + k1] = dk[k1];
922           }
923         }
924         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
925         if (i > aj[j]) {
926           ap    = aa + j * 4; /* ptr to the beginning of the block */
927           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
928           ap[1] = ap[2];
929           ap[2] = dk[1];
930         }
931       }
932     }
933     PetscCall(PetscFree(a2anew));
934   }
935 
936   /* for each row k */
937   for (k = 0; k < mbs; k++) {
938     /*initialize k-th row with elements nonzero in row perm(k) of A */
939     jmin = ai[perm_ptr[k]];
940     jmax = ai[perm_ptr[k] + 1];
941     ap   = aa + jmin * 4;
942     for (j = jmin; j < jmax; j++) {
943       vj       = perm_ptr[aj[j]]; /* block col. index */
944       rtmp_ptr = rtmp + vj * 4;
945       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
946     }
947 
948     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
949     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
950     i = jl[k]; /* first row to be added to k_th row  */
951 
952     while (i < k) {
953       nexti = jl[i]; /* next row to be added to k_th row */
954 
955       /* compute multiplier */
956       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
957 
958       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959       diag   = ba + i * 4;
960       u      = ba + ili * 4;
961       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
962       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
963       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
964       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
965 
966       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967       dk[0] += uik[0] * u[0] + uik[1] * u[1];
968       dk[1] += uik[2] * u[0] + uik[3] * u[1];
969       dk[2] += uik[0] * u[2] + uik[1] * u[3];
970       dk[3] += uik[2] * u[2] + uik[3] * u[3];
971 
972       PetscCall(PetscLogFlops(16.0 * 2.0));
973 
974       /* update -U(i,k): ba[ili] = uik */
975       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
976 
977       /* add multiple of row i to k-th row ... */
978       jmin = ili + 1;
979       jmax = bi[i + 1];
980       if (jmin < jmax) {
981         for (j = jmin; j < jmax; j++) {
982           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983           rtmp_ptr = rtmp + bj[j] * 4;
984           u        = ba + j * 4;
985           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
986           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
987           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
988           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
989         }
990         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
991 
992         /* ... add i to row list for next nonzero entry */
993         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
994         j     = bj[jmin];
995         jl[i] = jl[j];
996         jl[j] = i; /* update jl */
997       }
998       i = nexti;
999     }
1000 
1001     /* save nonzero entries in k-th row of U ... */
1002 
1003     /* invert diagonal block */
1004     diag = ba + k * 4;
1005     PetscCall(PetscArraycpy(diag, dk, 4));
1006     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1007     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1008 
1009     jmin = bi[k];
1010     jmax = bi[k + 1];
1011     if (jmin < jmax) {
1012       for (j = jmin; j < jmax; j++) {
1013         vj       = bj[j]; /* block col. index of U */
1014         u        = ba + j * 4;
1015         rtmp_ptr = rtmp + vj * 4;
1016         for (k1 = 0; k1 < 4; k1++) {
1017           *u++        = *rtmp_ptr;
1018           *rtmp_ptr++ = 0.0;
1019         }
1020       }
1021 
1022       /* ... add k to row list for first nonzero entry in k-th row */
1023       il[k] = jmin;
1024       i     = bj[jmin];
1025       jl[k] = jl[i];
1026       jl[i] = k;
1027     }
1028   }
1029 
1030   PetscCall(PetscFree(rtmp));
1031   PetscCall(PetscFree2(il, jl));
1032   if (a->permute) PetscCall(PetscFree(aa));
1033   PetscCall(ISRestoreIndices(perm, &perm_ptr));
1034 
1035   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1036   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1037   C->assembled           = PETSC_TRUE;
1038   C->preallocated        = PETSC_TRUE;
1039 
1040   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1041   PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043 
1044 /*
1045       Version for when blocks are 2 by 2 Using natural ordering
1046 */
1047 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1048 {
1049   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1050   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1051   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1052   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1053   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1054   PetscReal     shift = info->shiftamount;
1055   PetscBool     allowzeropivot, zeropivotdetected;
1056 
1057   PetscFunctionBegin;
1058   allowzeropivot = PetscNot(A->erroriffailure);
1059 
1060   /* initialization */
1061   /* il and jl record the first nonzero element in each row of the accessing
1062      window U(0:k, k:mbs-1).
1063      jl:    list of rows to be added to uneliminated rows
1064             i>= k: jl(i) is the first row to be added to row i
1065             i<  k: jl(i) is the row following row i in some list of rows
1066             jl(i) = mbs indicates the end of a list
1067      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1068             row i of U */
1069   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1070   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1071   il[0] = 0;
1072   for (i = 0; i < mbs; i++) jl[i] = mbs;
1073 
1074   ai = a->i;
1075   aj = a->j;
1076   aa = a->a;
1077 
1078   /* for each row k */
1079   for (k = 0; k < mbs; k++) {
1080     /*initialize k-th row with elements nonzero in row k of A */
1081     jmin = ai[k];
1082     jmax = ai[k + 1];
1083     ap   = aa + jmin * 4;
1084     for (j = jmin; j < jmax; j++) {
1085       vj       = aj[j]; /* block col. index */
1086       rtmp_ptr = rtmp + vj * 4;
1087       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1088     }
1089 
1090     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1091     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1092     i = jl[k]; /* first row to be added to k_th row  */
1093 
1094     while (i < k) {
1095       nexti = jl[i]; /* next row to be added to k_th row */
1096 
1097       /* compute multiplier */
1098       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1099 
1100       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1101       diag   = ba + i * 4;
1102       u      = ba + ili * 4;
1103       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1104       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1105       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1106       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1107 
1108       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1109       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1110       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1111       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1112       dk[3] += uik[2] * u[2] + uik[3] * u[3];
1113 
1114       PetscCall(PetscLogFlops(16.0 * 2.0));
1115 
1116       /* update -U(i,k): ba[ili] = uik */
1117       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1118 
1119       /* add multiple of row i to k-th row ... */
1120       jmin = ili + 1;
1121       jmax = bi[i + 1];
1122       if (jmin < jmax) {
1123         for (j = jmin; j < jmax; j++) {
1124           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1125           rtmp_ptr = rtmp + bj[j] * 4;
1126           u        = ba + j * 4;
1127           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1128           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1129           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1130           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1131         }
1132         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1133 
1134         /* ... add i to row list for next nonzero entry */
1135         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1136         j     = bj[jmin];
1137         jl[i] = jl[j];
1138         jl[j] = i; /* update jl */
1139       }
1140       i = nexti;
1141     }
1142 
1143     /* save nonzero entries in k-th row of U ... */
1144 
1145     /* invert diagonal block */
1146     diag = ba + k * 4;
1147     PetscCall(PetscArraycpy(diag, dk, 4));
1148     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1149     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1150 
1151     jmin = bi[k];
1152     jmax = bi[k + 1];
1153     if (jmin < jmax) {
1154       for (j = jmin; j < jmax; j++) {
1155         vj       = bj[j]; /* block col. index of U */
1156         u        = ba + j * 4;
1157         rtmp_ptr = rtmp + vj * 4;
1158         for (k1 = 0; k1 < 4; k1++) {
1159           *u++        = *rtmp_ptr;
1160           *rtmp_ptr++ = 0.0;
1161         }
1162       }
1163 
1164       /* ... add k to row list for first nonzero entry in k-th row */
1165       il[k] = jmin;
1166       i     = bj[jmin];
1167       jl[k] = jl[i];
1168       jl[i] = k;
1169     }
1170   }
1171 
1172   PetscCall(PetscFree(rtmp));
1173   PetscCall(PetscFree2(il, jl));
1174 
1175   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1179   C->assembled           = PETSC_TRUE;
1180   C->preallocated        = PETSC_TRUE;
1181 
1182   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1183   PetscFunctionReturn(PETSC_SUCCESS);
1184 }
1185 
1186 /*
1187     Numeric U^T*D*U factorization for SBAIJ format.
1188     Version for blocks are 1 by 1.
1189 */
1190 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1191 {
1192   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1193   IS              ip = b->row;
1194   const PetscInt *ai, *aj, *rip;
1195   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1196   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1197   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1198   PetscReal       rs;
1199   FactorShiftCtx  sctx;
1200 
1201   PetscFunctionBegin;
1202   /* MatPivotSetUp(): initialize shift context sctx */
1203   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1204 
1205   PetscCall(ISGetIndices(ip, &rip));
1206   if (!a->permute) {
1207     ai = a->i;
1208     aj = a->j;
1209     aa = a->a;
1210   } else {
1211     ai = a->inew;
1212     aj = a->jnew;
1213     nz = ai[mbs];
1214     PetscCall(PetscMalloc1(nz, &aa));
1215     a2anew = a->a2anew;
1216     bval   = a->a;
1217     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1218   }
1219 
1220   /* initialization */
1221   /* il and jl record the first nonzero element in each row of the accessing
1222      window U(0:k, k:mbs-1).
1223      jl:    list of rows to be added to uneliminated rows
1224             i>= k: jl(i) is the first row to be added to row i
1225             i<  k: jl(i) is the row following row i in some list of rows
1226             jl(i) = mbs indicates the end of a list
1227      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1228             row i of U */
1229   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1230 
1231   do {
1232     sctx.newshift = PETSC_FALSE;
1233     il[0]         = 0;
1234     for (i = 0; i < mbs; i++) {
1235       rtmp[i] = 0.0;
1236       jl[i]   = mbs;
1237     }
1238 
1239     for (k = 0; k < mbs; k++) {
1240       /*initialize k-th row by the perm[k]-th row of A */
1241       jmin = ai[rip[k]];
1242       jmax = ai[rip[k] + 1];
1243       bval = ba + bi[k];
1244       for (j = jmin; j < jmax; j++) {
1245         col       = rip[aj[j]];
1246         rtmp[col] = aa[j];
1247         *bval++   = 0.0; /* for in-place factorization */
1248       }
1249 
1250       /* shift the diagonal of the matrix */
1251       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1252 
1253       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1254       dk = rtmp[k];
1255       i  = jl[k]; /* first row to be added to k_th row  */
1256 
1257       while (i < k) {
1258         nexti = jl[i]; /* next row to be added to k_th row */
1259 
1260         /* compute multiplier, update diag(k) and U(i,k) */
1261         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1262         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1263         dk += uikdi * ba[ili];
1264         ba[ili] = uikdi; /* -U(i,k) */
1265 
1266         /* add multiple of row i to k-th row */
1267         jmin = ili + 1;
1268         jmax = bi[i + 1];
1269         if (jmin < jmax) {
1270           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1271           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1272 
1273           /* update il and jl for row i */
1274           il[i] = jmin;
1275           j     = bj[jmin];
1276           jl[i] = jl[j];
1277           jl[j] = i;
1278         }
1279         i = nexti;
1280       }
1281 
1282       /* shift the diagonals when zero pivot is detected */
1283       /* compute rs=sum of abs(off-diagonal) */
1284       rs   = 0.0;
1285       jmin = bi[k] + 1;
1286       nz   = bi[k + 1] - jmin;
1287       if (nz) {
1288         bcol = bj + jmin;
1289         while (nz--) {
1290           rs += PetscAbsScalar(rtmp[*bcol]);
1291           bcol++;
1292         }
1293       }
1294 
1295       sctx.rs = rs;
1296       sctx.pv = dk;
1297       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1298       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1299       dk = sctx.pv;
1300 
1301       /* copy data into U(k,:) */
1302       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1303       jmin      = bi[k] + 1;
1304       jmax      = bi[k + 1];
1305       if (jmin < jmax) {
1306         for (j = jmin; j < jmax; j++) {
1307           col       = bj[j];
1308           ba[j]     = rtmp[col];
1309           rtmp[col] = 0.0;
1310         }
1311         /* add the k-th row into il and jl */
1312         il[k] = jmin;
1313         i     = bj[jmin];
1314         jl[k] = jl[i];
1315         jl[i] = k;
1316       }
1317     }
1318   } while (sctx.newshift);
1319   PetscCall(PetscFree3(rtmp, il, jl));
1320   if (a->permute) PetscCall(PetscFree(aa));
1321 
1322   PetscCall(ISRestoreIndices(ip, &rip));
1323 
1324   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1325   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1326   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1327   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1328   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1329   C->assembled           = PETSC_TRUE;
1330   C->preallocated        = PETSC_TRUE;
1331 
1332   PetscCall(PetscLogFlops(C->rmap->N));
1333   if (sctx.nshift) {
1334     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1335       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1337       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1338     }
1339   }
1340   PetscFunctionReturn(PETSC_SUCCESS);
1341 }
1342 
1343 /*
1344   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1345   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1346 */
1347 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1348 {
1349   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1350   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1351   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1352   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1353   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1354   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1355   FactorShiftCtx sctx;
1356   PetscReal      rs;
1357   MatScalar      d, *v;
1358 
1359   PetscFunctionBegin;
1360   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1361 
1362   /* MatPivotSetUp(): initialize shift context sctx */
1363   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1364 
1365   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1366     sctx.shift_top = info->zeropivot;
1367 
1368     PetscCall(PetscArrayzero(rtmp, mbs));
1369 
1370     for (i = 0; i < mbs; i++) {
1371       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1372       d = (aa)[a->diag[i]];
1373       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1374       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1375       v     = aa + ai[i] + 1;
1376       nz    = ai[i + 1] - ai[i] - 1;
1377       for (j = 0; j < nz; j++) {
1378         rtmp[i] += PetscAbsScalar(v[j]);
1379         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1380       }
1381       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1382     }
1383     sctx.shift_top *= 1.1;
1384     sctx.nshift_max = 5;
1385     sctx.shift_lo   = 0.;
1386     sctx.shift_hi   = 1.;
1387   }
1388 
1389   /* allocate working arrays
1390      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1391      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1392   */
1393   do {
1394     sctx.newshift = PETSC_FALSE;
1395 
1396     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1397     if (mbs) il[0] = 0;
1398 
1399     for (k = 0; k < mbs; k++) {
1400       /* zero rtmp */
1401       nz    = bi[k + 1] - bi[k];
1402       bjtmp = bj + bi[k];
1403       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1404 
1405       /* load in initial unfactored row */
1406       bval = ba + bi[k];
1407       jmin = ai[k];
1408       jmax = ai[k + 1];
1409       for (j = jmin; j < jmax; j++) {
1410         col       = aj[j];
1411         rtmp[col] = aa[j];
1412         *bval++   = 0.0; /* for in-place factorization */
1413       }
1414       /* shift the diagonal of the matrix: ZeropivotApply() */
1415       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1416 
1417       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1418       dk = rtmp[k];
1419       i  = c2r[k]; /* first row to be added to k_th row  */
1420 
1421       while (i < k) {
1422         nexti = c2r[i]; /* next row to be added to k_th row */
1423 
1424         /* compute multiplier, update diag(k) and U(i,k) */
1425         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1426         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1427         dk += uikdi * ba[ili];           /* update diag[k] */
1428         ba[ili] = uikdi;                 /* -U(i,k) */
1429 
1430         /* add multiple of row i to k-th row */
1431         jmin = ili + 1;
1432         jmax = bi[i + 1];
1433         if (jmin < jmax) {
1434           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1435           /* update il and c2r for row i */
1436           il[i]  = jmin;
1437           j      = bj[jmin];
1438           c2r[i] = c2r[j];
1439           c2r[j] = i;
1440         }
1441         i = nexti;
1442       }
1443 
1444       /* copy data into U(k,:) */
1445       rs   = 0.0;
1446       jmin = bi[k];
1447       jmax = bi[k + 1] - 1;
1448       if (jmin < jmax) {
1449         for (j = jmin; j < jmax; j++) {
1450           col   = bj[j];
1451           ba[j] = rtmp[col];
1452           rs += PetscAbsScalar(ba[j]);
1453         }
1454         /* add the k-th row into il and c2r */
1455         il[k]  = jmin;
1456         i      = bj[jmin];
1457         c2r[k] = c2r[i];
1458         c2r[i] = k;
1459       }
1460 
1461       sctx.rs = rs;
1462       sctx.pv = dk;
1463       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1464       if (sctx.newshift) break;
1465       dk = sctx.pv;
1466 
1467       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1468     }
1469   } while (sctx.newshift);
1470 
1471   PetscCall(PetscFree3(rtmp, il, c2r));
1472 
1473   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1474   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1475   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1477   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1478   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1479 
1480   B->assembled    = PETSC_TRUE;
1481   B->preallocated = PETSC_TRUE;
1482 
1483   PetscCall(PetscLogFlops(B->rmap->n));
1484 
1485   /* MatPivotView() */
1486   if (sctx.nshift) {
1487     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1488       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1489     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1490       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1491     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1492       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1493     }
1494   }
1495   PetscFunctionReturn(PETSC_SUCCESS);
1496 }
1497 
1498 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1499 {
1500   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1501   PetscInt       i, j, mbs = a->mbs;
1502   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1503   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1504   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1505   PetscReal      rs;
1506   FactorShiftCtx sctx;
1507 
1508   PetscFunctionBegin;
1509   /* MatPivotSetUp(): initialize shift context sctx */
1510   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1511 
1512   /* initialization */
1513   /* il and jl record the first nonzero element in each row of the accessing
1514      window U(0:k, k:mbs-1).
1515      jl:    list of rows to be added to uneliminated rows
1516             i>= k: jl(i) is the first row to be added to row i
1517             i<  k: jl(i) is the row following row i in some list of rows
1518             jl(i) = mbs indicates the end of a list
1519      il(i): points to the first nonzero element in U(i,k:mbs-1)
1520   */
1521   PetscCall(PetscMalloc1(mbs, &rtmp));
1522   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1523 
1524   do {
1525     sctx.newshift = PETSC_FALSE;
1526     il[0]         = 0;
1527     for (i = 0; i < mbs; i++) {
1528       rtmp[i] = 0.0;
1529       jl[i]   = mbs;
1530     }
1531 
1532     for (k = 0; k < mbs; k++) {
1533       /*initialize k-th row with elements nonzero in row perm(k) of A */
1534       nz   = ai[k + 1] - ai[k];
1535       acol = aj + ai[k];
1536       aval = aa + ai[k];
1537       bval = ba + bi[k];
1538       while (nz--) {
1539         rtmp[*acol++] = *aval++;
1540         *bval++       = 0.0; /* for in-place factorization */
1541       }
1542 
1543       /* shift the diagonal of the matrix */
1544       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1545 
1546       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1547       dk = rtmp[k];
1548       i  = jl[k]; /* first row to be added to k_th row  */
1549 
1550       while (i < k) {
1551         nexti = jl[i]; /* next row to be added to k_th row */
1552         /* compute multiplier, update D(k) and U(i,k) */
1553         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1554         uikdi = -ba[ili] * ba[bi[i]];
1555         dk += uikdi * ba[ili];
1556         ba[ili] = uikdi; /* -U(i,k) */
1557 
1558         /* add multiple of row i to k-th row ... */
1559         jmin = ili + 1;
1560         nz   = bi[i + 1] - jmin;
1561         if (nz > 0) {
1562           bcol = bj + jmin;
1563           bval = ba + jmin;
1564           PetscCall(PetscLogFlops(2.0 * nz));
1565           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1566 
1567           /* update il and jl for i-th row */
1568           il[i] = jmin;
1569           j     = bj[jmin];
1570           jl[i] = jl[j];
1571           jl[j] = i;
1572         }
1573         i = nexti;
1574       }
1575 
1576       /* shift the diagonals when zero pivot is detected */
1577       /* compute rs=sum of abs(off-diagonal) */
1578       rs   = 0.0;
1579       jmin = bi[k] + 1;
1580       nz   = bi[k + 1] - jmin;
1581       if (nz) {
1582         bcol = bj + jmin;
1583         while (nz--) {
1584           rs += PetscAbsScalar(rtmp[*bcol]);
1585           bcol++;
1586         }
1587       }
1588 
1589       sctx.rs = rs;
1590       sctx.pv = dk;
1591       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1592       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1593       dk = sctx.pv;
1594 
1595       /* copy data into U(k,:) */
1596       ba[bi[k]] = 1.0 / dk;
1597       jmin      = bi[k] + 1;
1598       nz        = bi[k + 1] - jmin;
1599       if (nz) {
1600         bcol = bj + jmin;
1601         bval = ba + jmin;
1602         while (nz--) {
1603           *bval++       = rtmp[*bcol];
1604           rtmp[*bcol++] = 0.0;
1605         }
1606         /* add k-th row into il and jl */
1607         il[k] = jmin;
1608         i     = bj[jmin];
1609         jl[k] = jl[i];
1610         jl[i] = k;
1611       }
1612     } /* end of for (k = 0; k<mbs; k++) */
1613   } while (sctx.newshift);
1614   PetscCall(PetscFree(rtmp));
1615   PetscCall(PetscFree2(il, jl));
1616 
1617   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1618   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1619   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1622 
1623   C->assembled    = PETSC_TRUE;
1624   C->preallocated = PETSC_TRUE;
1625 
1626   PetscCall(PetscLogFlops(C->rmap->N));
1627   if (sctx.nshift) {
1628     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1629       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1631       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1632     }
1633   }
1634   PetscFunctionReturn(PETSC_SUCCESS);
1635 }
1636 
1637 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1638 {
1639   Mat C;
1640 
1641   PetscFunctionBegin;
1642   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1643   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1644   PetscCall(MatCholeskyFactorNumeric(C, A, info));
1645 
1646   A->ops->solve          = C->ops->solve;
1647   A->ops->solvetranspose = C->ops->solvetranspose;
1648 
1649   PetscCall(MatHeaderMerge(A, &C));
1650   PetscFunctionReturn(PETSC_SUCCESS);
1651 }
1652