xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petsc/private/kernels/blockinvert.h>
4 #include <petscis.h>
5 
6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7 {
8   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9   MatScalar    *dd   = fact->a;
10   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
11 
12   PetscFunctionBegin;
13   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
14 
15   nneg_tmp = 0;
16   npos_tmp = 0;
17   for (i = 0; i < mbs; i++) {
18     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20     fi++;
21   }
22   if (nneg) *nneg = nneg_tmp;
23   if (npos) *npos = npos_tmp;
24   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25   PetscFunctionReturn(PETSC_SUCCESS);
26 }
27 
28 /*
29   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31 */
32 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
33 {
34   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
35   const PetscInt *rip, *ai, *aj;
36   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
37   PetscInt        m, reallocs = 0, prow;
38   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
39   PetscReal       f = info->fill;
40   PetscBool       perm_identity;
41 
42   PetscFunctionBegin;
43   /* check whether perm is the identity mapping */
44   PetscCall(ISIdentity(perm, &perm_identity));
45   PetscCall(ISGetIndices(perm, &rip));
46 
47   if (perm_identity) { /* without permutation */
48     a->permute = PETSC_FALSE;
49 
50     ai = a->i;
51     aj = a->j;
52   } else { /* non-trivial permutation */
53     a->permute = PETSC_TRUE;
54 
55     PetscCall(MatReorderingSeqSBAIJ(A, perm));
56 
57     ai = a->inew;
58     aj = a->jnew;
59   }
60 
61   /* initialization */
62   PetscCall(PetscMalloc1(mbs + 1, &iu));
63   umax = (PetscInt)(f * ai[mbs] + 1);
64   umax += mbs + 1;
65   PetscCall(PetscMalloc1(umax, &ju));
66   iu[0] = mbs + 1;
67   juidx = mbs + 1; /* index for ju */
68   /* jl linked list for pivot row -- linked list for col index */
69   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
70   for (i = 0; i < mbs; i++) {
71     jl[i] = mbs;
72     q[i]  = 0;
73   }
74 
75   /* for each row k */
76   for (k = 0; k < mbs; k++) {
77     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
78     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
79     q[k] = mbs;
80     /* initialize nonzero structure of k-th row to row rip[k] of A */
81     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
82     jmax = ai[rip[k] + 1];
83     for (j = jmin; j < jmax; j++) {
84       vj = rip[aj[j]]; /* col. value */
85       if (vj > k) {
86         qm = k;
87         do {
88           m  = qm;
89           qm = q[m];
90         } while (qm < vj);
91         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
92         nzk++;
93         q[m]  = vj;
94         q[vj] = qm;
95       } /* if (vj > k) */
96     }   /* for (j=jmin; j<jmax; j++) */
97 
98     /* modify nonzero structure of k-th row by computing fill-in
99        for each row i to be merged in */
100     prow = k;
101     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
102 
103     while (prow < k) {
104       /* merge row prow into k-th row */
105       jmin = iu[prow] + 1;
106       jmax = iu[prow + 1];
107       qm   = k;
108       for (j = jmin; j < jmax; j++) {
109         vj = ju[j];
110         do {
111           m  = qm;
112           qm = q[m];
113         } while (qm < vj);
114         if (qm != vj) {
115           nzk++;
116           q[m]  = vj;
117           q[vj] = qm;
118           qm    = vj;
119         }
120       }
121       prow = jl[prow]; /* next pivot row */
122     }
123 
124     /* add k to row list for first nonzero element in k-th row */
125     if (nzk > 0) {
126       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
127       jl[k] = jl[i];
128       jl[i] = k;
129     }
130     iu[k + 1] = iu[k] + nzk;
131 
132     /* allocate more space to ju if needed */
133     if (iu[k + 1] > umax) {
134       /* estimate how much additional space we will need */
135       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
136       /* just double the memory each time */
137       maxadd = umax;
138       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
139       umax += maxadd;
140 
141       /* allocate a longer ju */
142       PetscCall(PetscMalloc1(umax, &jutmp));
143       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
144       PetscCall(PetscFree(ju));
145       ju = jutmp;
146       reallocs++; /* count how many times we realloc */
147     }
148 
149     /* save nonzero structure of k-th row in ju */
150     i = k;
151     while (nzk--) {
152       i           = q[i];
153       ju[juidx++] = i;
154     }
155   }
156 
157 #if defined(PETSC_USE_INFO)
158   if (ai[mbs] != 0) {
159     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
160     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
161     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
162     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
163     PetscCall(PetscInfo(A, "for best performance.\n"));
164   } else {
165     PetscCall(PetscInfo(A, "Empty matrix\n"));
166   }
167 #endif
168 
169   PetscCall(ISRestoreIndices(perm, &rip));
170   PetscCall(PetscFree2(jl, q));
171 
172   /* put together the new matrix */
173   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
174 
175   b               = (Mat_SeqSBAIJ *)(F)->data;
176   b->singlemalloc = PETSC_FALSE;
177   b->free_a       = PETSC_TRUE;
178   b->free_ij      = PETSC_TRUE;
179 
180   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
181   b->j    = ju;
182   b->i    = iu;
183   b->diag = NULL;
184   b->ilen = NULL;
185   b->imax = NULL;
186   b->row  = perm;
187 
188   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
189 
190   PetscCall(PetscObjectReference((PetscObject)perm));
191 
192   b->icol = perm;
193   PetscCall(PetscObjectReference((PetscObject)perm));
194   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
195   /* In b structure:  Free imax, ilen, old a, old j.
196      Allocate idnew, solve_work, new a, new j */
197   b->maxnz = b->nz = iu[mbs];
198 
199   (F)->info.factor_mallocs   = reallocs;
200   (F)->info.fill_ratio_given = f;
201   if (ai[mbs] != 0) {
202     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
203   } else {
204     (F)->info.fill_ratio_needed = 0.0;
205   }
206   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
207   PetscFunctionReturn(PETSC_SUCCESS);
208 }
209 /*
210     Symbolic U^T*D*U factorization for SBAIJ format.
211     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
212 */
213 #include <petscbt.h>
214 #include <../src/mat/utils/freespace.h>
215 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
216 {
217   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
218   Mat_SeqSBAIJ      *b;
219   PetscBool          perm_identity, missing;
220   PetscReal          fill = info->fill;
221   const PetscInt    *rip, *ai = a->i, *aj = a->j;
222   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
223   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
224   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
225   PetscFreeSpaceList free_space = NULL, current_space = NULL;
226   PetscBT            lnkbt;
227 
228   PetscFunctionBegin;
229   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
230   PetscCall(MatMissingDiagonal(A, &missing, &i));
231   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
232   if (bs > 1) {
233     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
234     PetscFunctionReturn(PETSC_SUCCESS);
235   }
236 
237   /* check whether perm is the identity mapping */
238   PetscCall(ISIdentity(perm, &perm_identity));
239   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
240   a->permute = PETSC_FALSE;
241   PetscCall(ISGetIndices(perm, &rip));
242 
243   /* initialization */
244   PetscCall(PetscMalloc1(mbs + 1, &ui));
245   PetscCall(PetscMalloc1(mbs + 1, &udiag));
246   ui[0] = 0;
247 
248   /* jl: linked list for storing indices of the pivot rows
249      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
250   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
251   for (i = 0; i < mbs; i++) {
252     jl[i] = mbs;
253     il[i] = 0;
254   }
255 
256   /* create and initialize a linked list for storing column indices of the active row k */
257   nlnk = mbs + 1;
258   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
259 
260   /* initial FreeSpace size is fill*(ai[mbs]+1) */
261   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
262   current_space = free_space;
263 
264   for (k = 0; k < mbs; k++) { /* for each active row k */
265     /* initialize lnk by the column indices of row rip[k] of A */
266     nzk   = 0;
267     ncols = ai[k + 1] - ai[k];
268     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
269     for (j = 0; j < ncols; j++) {
270       i       = *(aj + ai[k] + j);
271       cols[j] = i;
272     }
273     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
274     nzk += nlnk;
275 
276     /* update lnk by computing fill-in for each pivot row to be merged in */
277     prow = jl[k]; /* 1st pivot row */
278 
279     while (prow < k) {
280       nextprow = jl[prow];
281       /* merge prow into k-th row */
282       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
283       jmax   = ui[prow + 1];
284       ncols  = jmax - jmin;
285       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
286       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
287       nzk += nlnk;
288 
289       /* update il and jl for prow */
290       if (jmin < jmax) {
291         il[prow] = jmin;
292         j        = *uj_ptr;
293         jl[prow] = jl[j];
294         jl[j]    = prow;
295       }
296       prow = nextprow;
297     }
298 
299     /* if free space is not available, make more free space */
300     if (current_space->local_remaining < nzk) {
301       i = mbs - k + 1;                                   /* num of unfactored rows */
302       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
303       PetscCall(PetscFreeSpaceGet(i, &current_space));
304       reallocs++;
305     }
306 
307     /* copy data into free space, then initialize lnk */
308     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
309 
310     /* add the k-th row into il and jl */
311     if (nzk > 1) {
312       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
313       jl[k] = jl[i];
314       jl[i] = k;
315       il[k] = ui[k] + 1;
316     }
317     ui_ptr[k] = current_space->array;
318 
319     current_space->array += nzk;
320     current_space->local_used += nzk;
321     current_space->local_remaining -= nzk;
322 
323     ui[k + 1] = ui[k] + nzk;
324   }
325 
326   PetscCall(ISRestoreIndices(perm, &rip));
327   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
328 
329   /* destroy list of free space and other temporary array(s) */
330   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
331   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
332   PetscCall(PetscLLDestroy(lnk, lnkbt));
333 
334   /* put together the new matrix in MATSEQSBAIJ format */
335   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
336 
337   b               = (Mat_SeqSBAIJ *)fact->data;
338   b->singlemalloc = PETSC_FALSE;
339   b->free_a       = PETSC_TRUE;
340   b->free_ij      = PETSC_TRUE;
341 
342   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
343 
344   b->j         = uj;
345   b->i         = ui;
346   b->diag      = udiag;
347   b->free_diag = PETSC_TRUE;
348   b->ilen      = NULL;
349   b->imax      = NULL;
350   b->row       = perm;
351   b->icol      = perm;
352 
353   PetscCall(PetscObjectReference((PetscObject)perm));
354   PetscCall(PetscObjectReference((PetscObject)perm));
355 
356   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
357 
358   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
359 
360   b->maxnz = b->nz = ui[mbs];
361 
362   fact->info.factor_mallocs   = reallocs;
363   fact->info.fill_ratio_given = fill;
364   if (ai[mbs] != 0) {
365     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
366   } else {
367     fact->info.fill_ratio_needed = 0.0;
368   }
369 #if defined(PETSC_USE_INFO)
370   if (ai[mbs] != 0) {
371     PetscReal af = fact->info.fill_ratio_needed;
372     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
373     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
374     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
375   } else {
376     PetscCall(PetscInfo(A, "Empty matrix\n"));
377   }
378 #endif
379   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
383 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
384 {
385   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
386   Mat_SeqSBAIJ      *b;
387   PetscBool          perm_identity, missing;
388   PetscReal          fill = info->fill;
389   const PetscInt    *rip, *ai, *aj;
390   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
391   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
392   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
393   PetscFreeSpaceList free_space = NULL, current_space = NULL;
394   PetscBT            lnkbt;
395 
396   PetscFunctionBegin;
397   PetscCall(MatMissingDiagonal(A, &missing, &d));
398   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
399 
400   /*
401    This code originally uses Modified Sparse Row (MSR) storage
402    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
403    Then it is rewritten so the factor B takes seqsbaij format. However the associated
404    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
405    thus the original code in MSR format is still used for these cases.
406    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
407    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
408   */
409   if (bs > 1) {
410     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
411     PetscFunctionReturn(PETSC_SUCCESS);
412   }
413 
414   /* check whether perm is the identity mapping */
415   PetscCall(ISIdentity(perm, &perm_identity));
416   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
417   a->permute = PETSC_FALSE;
418   ai         = a->i;
419   aj         = a->j;
420   PetscCall(ISGetIndices(perm, &rip));
421 
422   /* initialization */
423   PetscCall(PetscMalloc1(mbs + 1, &ui));
424   ui[0] = 0;
425 
426   /* jl: linked list for storing indices of the pivot rows
427      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
428   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
429   for (i = 0; i < mbs; i++) {
430     jl[i] = mbs;
431     il[i] = 0;
432   }
433 
434   /* create and initialize a linked list for storing column indices of the active row k */
435   nlnk = mbs + 1;
436   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
437 
438   /* initial FreeSpace size is fill*(ai[mbs]+1) */
439   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
440   current_space = free_space;
441 
442   for (k = 0; k < mbs; k++) { /* for each active row k */
443     /* initialize lnk by the column indices of row rip[k] of A */
444     nzk   = 0;
445     ncols = ai[rip[k] + 1] - ai[rip[k]];
446     for (j = 0; j < ncols; j++) {
447       i       = *(aj + ai[rip[k]] + j);
448       cols[j] = rip[i];
449     }
450     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
451     nzk += nlnk;
452 
453     /* update lnk by computing fill-in for each pivot row to be merged in */
454     prow = jl[k]; /* 1st pivot row */
455 
456     while (prow < k) {
457       nextprow = jl[prow];
458       /* merge prow into k-th row */
459       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
460       jmax   = ui[prow + 1];
461       ncols  = jmax - jmin;
462       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
463       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
464       nzk += nlnk;
465 
466       /* update il and jl for prow */
467       if (jmin < jmax) {
468         il[prow] = jmin;
469 
470         j        = *uj_ptr;
471         jl[prow] = jl[j];
472         jl[j]    = prow;
473       }
474       prow = nextprow;
475     }
476 
477     /* if free space is not available, make more free space */
478     if (current_space->local_remaining < nzk) {
479       i = mbs - k + 1;                                                            /* num of unfactored rows */
480       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
481       PetscCall(PetscFreeSpaceGet(i, &current_space));
482       reallocs++;
483     }
484 
485     /* copy data into free space, then initialize lnk */
486     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
487 
488     /* add the k-th row into il and jl */
489     if (nzk - 1 > 0) {
490       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
491       jl[k] = jl[i];
492       jl[i] = k;
493       il[k] = ui[k] + 1;
494     }
495     ui_ptr[k] = current_space->array;
496 
497     current_space->array += nzk;
498     current_space->local_used += nzk;
499     current_space->local_remaining -= nzk;
500 
501     ui[k + 1] = ui[k] + nzk;
502   }
503 
504   PetscCall(ISRestoreIndices(perm, &rip));
505   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
506 
507   /* destroy list of free space and other temporary array(s) */
508   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
509   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
510   PetscCall(PetscLLDestroy(lnk, lnkbt));
511 
512   /* put together the new matrix in MATSEQSBAIJ format */
513   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
514 
515   b               = (Mat_SeqSBAIJ *)fact->data;
516   b->singlemalloc = PETSC_FALSE;
517   b->free_a       = PETSC_TRUE;
518   b->free_ij      = PETSC_TRUE;
519 
520   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
521 
522   b->j    = uj;
523   b->i    = ui;
524   b->diag = NULL;
525   b->ilen = NULL;
526   b->imax = NULL;
527   b->row  = perm;
528 
529   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
530 
531   PetscCall(PetscObjectReference((PetscObject)perm));
532   b->icol = perm;
533   PetscCall(PetscObjectReference((PetscObject)perm));
534   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535   b->maxnz = b->nz = ui[mbs];
536 
537   fact->info.factor_mallocs   = reallocs;
538   fact->info.fill_ratio_given = fill;
539   if (ai[mbs] != 0) {
540     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
541   } else {
542     fact->info.fill_ratio_needed = 0.0;
543   }
544 #if defined(PETSC_USE_INFO)
545   if (ai[mbs] != 0) {
546     PetscReal af = fact->info.fill_ratio_needed;
547     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
548     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
549     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
550   } else {
551     PetscCall(PetscInfo(A, "Empty matrix\n"));
552   }
553 #endif
554   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
555   PetscFunctionReturn(PETSC_SUCCESS);
556 }
557 
558 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
559 {
560   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561   IS              perm = b->row;
562   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563   PetscInt        i, j;
564   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
566   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
567   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
568   MatScalar      *work;
569   PetscInt       *pivots;
570   PetscBool       allowzeropivot, zeropivotdetected;
571 
572   PetscFunctionBegin;
573   /* initialization */
574   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576   allowzeropivot = PetscNot(A->erroriffailure);
577 
578   il[0] = 0;
579   for (i = 0; i < mbs; i++) jl[i] = mbs;
580 
581   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582   PetscCall(PetscMalloc1(bs, &pivots));
583 
584   PetscCall(ISGetIndices(perm, &perm_ptr));
585 
586   /* check permutation */
587   if (!a->permute) {
588     ai = a->i;
589     aj = a->j;
590     aa = a->a;
591   } else {
592     ai = a->inew;
593     aj = a->jnew;
594     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
598 
599     for (i = 0; i < mbs; i++) {
600       jmin = ai[i];
601       jmax = ai[i + 1];
602       for (j = jmin; j < jmax; j++) {
603         while (a2anew[j] != j) {
604           k         = a2anew[j];
605           a2anew[j] = a2anew[k];
606           a2anew[k] = k;
607           for (k1 = 0; k1 < bs2; k1++) {
608             dk[k1]           = aa[k * bs2 + k1];
609             aa[k * bs2 + k1] = aa[j * bs2 + k1];
610             aa[j * bs2 + k1] = dk[k1];
611           }
612         }
613         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
614         if (i > aj[j]) {
615           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
616           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
618             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619           }
620         }
621       }
622     }
623     PetscCall(PetscFree(a2anew));
624   }
625 
626   /* for each row k */
627   for (k = 0; k < mbs; k++) {
628     /*initialize k-th row with elements nonzero in row perm(k) of A */
629     jmin = ai[perm_ptr[k]];
630     jmax = ai[perm_ptr[k] + 1];
631 
632     ap = aa + jmin * bs2;
633     for (j = jmin; j < jmax; j++) {
634       vj       = perm_ptr[aj[j]]; /* block col. index */
635       rtmp_ptr = rtmp + vj * bs2;
636       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637     }
638 
639     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641     i = jl[k]; /* first row to be added to k_th row  */
642 
643     while (i < k) {
644       nexti = jl[i]; /* next row to be added to k_th row */
645 
646       /* compute multiplier */
647       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
648 
649       /* uik = -inv(Di)*U_bar(i,k) */
650       diag = ba + i * bs2;
651       u    = ba + ili * bs2;
652       PetscCall(PetscArrayzero(uik, bs2));
653       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
654 
655       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657       PetscCall(PetscLogFlops(4.0 * bs * bs2));
658 
659       /* update -U(i,k) */
660       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
661 
662       /* add multiple of row i to k-th row ... */
663       jmin = ili + 1;
664       jmax = bi[i + 1];
665       if (jmin < jmax) {
666         for (j = jmin; j < jmax; j++) {
667           /* rtmp += -U(i,k)^T * U_bar(i,j) */
668           rtmp_ptr = rtmp + bj[j] * bs2;
669           u        = ba + j * bs2;
670           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671         }
672         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
673 
674         /* ... add i to row list for next nonzero entry */
675         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676         j     = bj[jmin];
677         jl[i] = jl[j];
678         jl[j] = i; /* update jl */
679       }
680       i = nexti;
681     }
682 
683     /* save nonzero entries in k-th row of U ... */
684 
685     /* invert diagonal block */
686     diag = ba + k * bs2;
687     PetscCall(PetscArraycpy(diag, dk, bs2));
688 
689     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
691 
692     jmin = bi[k];
693     jmax = bi[k + 1];
694     if (jmin < jmax) {
695       for (j = jmin; j < jmax; j++) {
696         vj       = bj[j]; /* block col. index of U */
697         u        = ba + j * bs2;
698         rtmp_ptr = rtmp + vj * bs2;
699         for (k1 = 0; k1 < bs2; k1++) {
700           *u++        = *rtmp_ptr;
701           *rtmp_ptr++ = 0.0;
702         }
703       }
704 
705       /* ... add k to row list for first nonzero entry in k-th row */
706       il[k] = jmin;
707       i     = bj[jmin];
708       jl[k] = jl[i];
709       jl[i] = k;
710     }
711   }
712 
713   PetscCall(PetscFree(rtmp));
714   PetscCall(PetscFree2(il, jl));
715   PetscCall(PetscFree3(dk, uik, work));
716   PetscCall(PetscFree(pivots));
717   if (a->permute) PetscCall(PetscFree(aa));
718 
719   PetscCall(ISRestoreIndices(perm, &perm_ptr));
720 
721   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
722   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
724   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
725 
726   C->assembled    = PETSC_TRUE;
727   C->preallocated = PETSC_TRUE;
728 
729   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730   PetscFunctionReturn(PETSC_SUCCESS);
731 }
732 
733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
734 {
735   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
736   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
737   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
738   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
739   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
740   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
741   MatScalar    *work;
742   PetscInt     *pivots;
743   PetscBool     allowzeropivot, zeropivotdetected;
744 
745   PetscFunctionBegin;
746   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
747   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
748   il[0] = 0;
749   for (i = 0; i < mbs; i++) jl[i] = mbs;
750 
751   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
752   PetscCall(PetscMalloc1(bs, &pivots));
753   allowzeropivot = PetscNot(A->erroriffailure);
754 
755   ai = a->i;
756   aj = a->j;
757   aa = a->a;
758 
759   /* for each row k */
760   for (k = 0; k < mbs; k++) {
761     /*initialize k-th row with elements nonzero in row k of A */
762     jmin = ai[k];
763     jmax = ai[k + 1];
764     ap   = aa + jmin * bs2;
765     for (j = jmin; j < jmax; j++) {
766       vj       = aj[j]; /* block col. index */
767       rtmp_ptr = rtmp + vj * bs2;
768       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
769     }
770 
771     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
772     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
773     i = jl[k]; /* first row to be added to k_th row  */
774 
775     while (i < k) {
776       nexti = jl[i]; /* next row to be added to k_th row */
777 
778       /* compute multiplier */
779       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
780 
781       /* uik = -inv(Di)*U_bar(i,k) */
782       diag = ba + i * bs2;
783       u    = ba + ili * bs2;
784       PetscCall(PetscArrayzero(uik, bs2));
785       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
786 
787       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
788       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
789       PetscCall(PetscLogFlops(2.0 * bs * bs2));
790 
791       /* update -U(i,k) */
792       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
793 
794       /* add multiple of row i to k-th row ... */
795       jmin = ili + 1;
796       jmax = bi[i + 1];
797       if (jmin < jmax) {
798         for (j = jmin; j < jmax; j++) {
799           /* rtmp += -U(i,k)^T * U_bar(i,j) */
800           rtmp_ptr = rtmp + bj[j] * bs2;
801           u        = ba + j * bs2;
802           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
803         }
804         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
805 
806         /* ... add i to row list for next nonzero entry */
807         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
808         j     = bj[jmin];
809         jl[i] = jl[j];
810         jl[j] = i; /* update jl */
811       }
812       i = nexti;
813     }
814 
815     /* save nonzero entries in k-th row of U ... */
816 
817     /* invert diagonal block */
818     diag = ba + k * bs2;
819     PetscCall(PetscArraycpy(diag, dk, bs2));
820 
821     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
822     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
823 
824     jmin = bi[k];
825     jmax = bi[k + 1];
826     if (jmin < jmax) {
827       for (j = jmin; j < jmax; j++) {
828         vj       = bj[j]; /* block col. index of U */
829         u        = ba + j * bs2;
830         rtmp_ptr = rtmp + vj * bs2;
831         for (k1 = 0; k1 < bs2; k1++) {
832           *u++        = *rtmp_ptr;
833           *rtmp_ptr++ = 0.0;
834         }
835       }
836 
837       /* ... add k to row list for first nonzero entry in k-th row */
838       il[k] = jmin;
839       i     = bj[jmin];
840       jl[k] = jl[i];
841       jl[i] = k;
842     }
843   }
844 
845   PetscCall(PetscFree(rtmp));
846   PetscCall(PetscFree2(il, jl));
847   PetscCall(PetscFree3(dk, uik, work));
848   PetscCall(PetscFree(pivots));
849 
850   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854   C->assembled           = PETSC_TRUE;
855   C->preallocated        = PETSC_TRUE;
856 
857   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 /*
862     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
863     Version for blocks 2 by 2.
864 */
865 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
866 {
867   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
868   IS              perm = b->row;
869   const PetscInt *ai, *aj, *perm_ptr;
870   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
871   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
872   MatScalar      *ba = b->a, *aa, *ap;
873   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
874   PetscReal       shift = info->shiftamount;
875   PetscBool       allowzeropivot, zeropivotdetected;
876 
877   PetscFunctionBegin;
878   allowzeropivot = PetscNot(A->erroriffailure);
879 
880   /* initialization */
881   /* il and jl record the first nonzero element in each row of the accessing
882      window U(0:k, k:mbs-1).
883      jl:    list of rows to be added to uneliminated rows
884             i>= k: jl(i) is the first row to be added to row i
885             i<  k: jl(i) is the row following row i in some list of rows
886             jl(i) = mbs indicates the end of a list
887      il(i): points to the first nonzero element in columns k,...,mbs-1 of
888             row i of U */
889   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
890   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
891   il[0] = 0;
892   for (i = 0; i < mbs; i++) jl[i] = mbs;
893 
894   PetscCall(ISGetIndices(perm, &perm_ptr));
895 
896   /* check permutation */
897   if (!a->permute) {
898     ai = a->i;
899     aj = a->j;
900     aa = a->a;
901   } else {
902     ai = a->inew;
903     aj = a->jnew;
904     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
905     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
906     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
907     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
908 
909     for (i = 0; i < mbs; i++) {
910       jmin = ai[i];
911       jmax = ai[i + 1];
912       for (j = jmin; j < jmax; j++) {
913         while (a2anew[j] != j) {
914           k         = a2anew[j];
915           a2anew[j] = a2anew[k];
916           a2anew[k] = k;
917           for (k1 = 0; k1 < 4; k1++) {
918             dk[k1]         = aa[k * 4 + k1];
919             aa[k * 4 + k1] = aa[j * 4 + k1];
920             aa[j * 4 + k1] = dk[k1];
921           }
922         }
923         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
924         if (i > aj[j]) {
925           ap    = aa + j * 4; /* ptr to the beginning of the block */
926           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
927           ap[1] = ap[2];
928           ap[2] = dk[1];
929         }
930       }
931     }
932     PetscCall(PetscFree(a2anew));
933   }
934 
935   /* for each row k */
936   for (k = 0; k < mbs; k++) {
937     /*initialize k-th row with elements nonzero in row perm(k) of A */
938     jmin = ai[perm_ptr[k]];
939     jmax = ai[perm_ptr[k] + 1];
940     ap   = aa + jmin * 4;
941     for (j = jmin; j < jmax; j++) {
942       vj       = perm_ptr[aj[j]]; /* block col. index */
943       rtmp_ptr = rtmp + vj * 4;
944       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
945     }
946 
947     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
948     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
949     i = jl[k]; /* first row to be added to k_th row  */
950 
951     while (i < k) {
952       nexti = jl[i]; /* next row to be added to k_th row */
953 
954       /* compute multiplier */
955       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
956 
957       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
958       diag   = ba + i * 4;
959       u      = ba + ili * 4;
960       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
961       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
962       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
963       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
964 
965       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
966       dk[0] += uik[0] * u[0] + uik[1] * u[1];
967       dk[1] += uik[2] * u[0] + uik[3] * u[1];
968       dk[2] += uik[0] * u[2] + uik[1] * u[3];
969       dk[3] += uik[2] * u[2] + uik[3] * u[3];
970 
971       PetscCall(PetscLogFlops(16.0 * 2.0));
972 
973       /* update -U(i,k): ba[ili] = uik */
974       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
975 
976       /* add multiple of row i to k-th row ... */
977       jmin = ili + 1;
978       jmax = bi[i + 1];
979       if (jmin < jmax) {
980         for (j = jmin; j < jmax; j++) {
981           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
982           rtmp_ptr = rtmp + bj[j] * 4;
983           u        = ba + j * 4;
984           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
985           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
986           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
987           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
988         }
989         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
990 
991         /* ... add i to row list for next nonzero entry */
992         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993         j     = bj[jmin];
994         jl[i] = jl[j];
995         jl[j] = i; /* update jl */
996       }
997       i = nexti;
998     }
999 
1000     /* save nonzero entries in k-th row of U ... */
1001 
1002     /* invert diagonal block */
1003     diag = ba + k * 4;
1004     PetscCall(PetscArraycpy(diag, dk, 4));
1005     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1006     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1007 
1008     jmin = bi[k];
1009     jmax = bi[k + 1];
1010     if (jmin < jmax) {
1011       for (j = jmin; j < jmax; j++) {
1012         vj       = bj[j]; /* block col. index of U */
1013         u        = ba + j * 4;
1014         rtmp_ptr = rtmp + vj * 4;
1015         for (k1 = 0; k1 < 4; k1++) {
1016           *u++        = *rtmp_ptr;
1017           *rtmp_ptr++ = 0.0;
1018         }
1019       }
1020 
1021       /* ... add k to row list for first nonzero entry in k-th row */
1022       il[k] = jmin;
1023       i     = bj[jmin];
1024       jl[k] = jl[i];
1025       jl[i] = k;
1026     }
1027   }
1028 
1029   PetscCall(PetscFree(rtmp));
1030   PetscCall(PetscFree2(il, jl));
1031   if (a->permute) PetscCall(PetscFree(aa));
1032   PetscCall(ISRestoreIndices(perm, &perm_ptr));
1033 
1034   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1035   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1036   C->assembled           = PETSC_TRUE;
1037   C->preallocated        = PETSC_TRUE;
1038 
1039   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 /*
1044       Version for when blocks are 2 by 2 Using natural ordering
1045 */
1046 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1047 {
1048   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1049   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1050   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1051   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1052   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1053   PetscReal     shift = info->shiftamount;
1054   PetscBool     allowzeropivot, zeropivotdetected;
1055 
1056   PetscFunctionBegin;
1057   allowzeropivot = PetscNot(A->erroriffailure);
1058 
1059   /* initialization */
1060   /* il and jl record the first nonzero element in each row of the accessing
1061      window U(0:k, k:mbs-1).
1062      jl:    list of rows to be added to uneliminated rows
1063             i>= k: jl(i) is the first row to be added to row i
1064             i<  k: jl(i) is the row following row i in some list of rows
1065             jl(i) = mbs indicates the end of a list
1066      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1067             row i of U */
1068   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1069   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1070   il[0] = 0;
1071   for (i = 0; i < mbs; i++) jl[i] = mbs;
1072 
1073   ai = a->i;
1074   aj = a->j;
1075   aa = a->a;
1076 
1077   /* for each row k */
1078   for (k = 0; k < mbs; k++) {
1079     /*initialize k-th row with elements nonzero in row k of A */
1080     jmin = ai[k];
1081     jmax = ai[k + 1];
1082     ap   = aa + jmin * 4;
1083     for (j = jmin; j < jmax; j++) {
1084       vj       = aj[j]; /* block col. index */
1085       rtmp_ptr = rtmp + vj * 4;
1086       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1087     }
1088 
1089     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1090     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1091     i = jl[k]; /* first row to be added to k_th row  */
1092 
1093     while (i < k) {
1094       nexti = jl[i]; /* next row to be added to k_th row */
1095 
1096       /* compute multiplier */
1097       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1098 
1099       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1100       diag   = ba + i * 4;
1101       u      = ba + ili * 4;
1102       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1103       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1104       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1105       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1106 
1107       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1108       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1109       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1110       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1111       dk[3] += uik[2] * u[2] + uik[3] * u[3];
1112 
1113       PetscCall(PetscLogFlops(16.0 * 2.0));
1114 
1115       /* update -U(i,k): ba[ili] = uik */
1116       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1117 
1118       /* add multiple of row i to k-th row ... */
1119       jmin = ili + 1;
1120       jmax = bi[i + 1];
1121       if (jmin < jmax) {
1122         for (j = jmin; j < jmax; j++) {
1123           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1124           rtmp_ptr = rtmp + bj[j] * 4;
1125           u        = ba + j * 4;
1126           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1127           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1128           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1129           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1130         }
1131         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1132 
1133         /* ... add i to row list for next nonzero entry */
1134         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1135         j     = bj[jmin];
1136         jl[i] = jl[j];
1137         jl[j] = i; /* update jl */
1138       }
1139       i = nexti;
1140     }
1141 
1142     /* save nonzero entries in k-th row of U ... */
1143 
1144     /* invert diagonal block */
1145     diag = ba + k * 4;
1146     PetscCall(PetscArraycpy(diag, dk, 4));
1147     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1148     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1149 
1150     jmin = bi[k];
1151     jmax = bi[k + 1];
1152     if (jmin < jmax) {
1153       for (j = jmin; j < jmax; j++) {
1154         vj       = bj[j]; /* block col. index of U */
1155         u        = ba + j * 4;
1156         rtmp_ptr = rtmp + vj * 4;
1157         for (k1 = 0; k1 < 4; k1++) {
1158           *u++        = *rtmp_ptr;
1159           *rtmp_ptr++ = 0.0;
1160         }
1161       }
1162 
1163       /* ... add k to row list for first nonzero entry in k-th row */
1164       il[k] = jmin;
1165       i     = bj[jmin];
1166       jl[k] = jl[i];
1167       jl[i] = k;
1168     }
1169   }
1170 
1171   PetscCall(PetscFree(rtmp));
1172   PetscCall(PetscFree2(il, jl));
1173 
1174   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178   C->assembled           = PETSC_TRUE;
1179   C->preallocated        = PETSC_TRUE;
1180 
1181   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 /*
1186     Numeric U^T*D*U factorization for SBAIJ format.
1187     Version for blocks are 1 by 1.
1188 */
1189 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1190 {
1191   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1192   IS              ip = b->row;
1193   const PetscInt *ai, *aj, *rip;
1194   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1195   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1196   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1197   PetscReal       rs;
1198   FactorShiftCtx  sctx;
1199 
1200   PetscFunctionBegin;
1201   /* MatPivotSetUp(): initialize shift context sctx */
1202   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1203 
1204   PetscCall(ISGetIndices(ip, &rip));
1205   if (!a->permute) {
1206     ai = a->i;
1207     aj = a->j;
1208     aa = a->a;
1209   } else {
1210     ai = a->inew;
1211     aj = a->jnew;
1212     nz = ai[mbs];
1213     PetscCall(PetscMalloc1(nz, &aa));
1214     a2anew = a->a2anew;
1215     bval   = a->a;
1216     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1217   }
1218 
1219   /* initialization */
1220   /* il and jl record the first nonzero element in each row of the accessing
1221      window U(0:k, k:mbs-1).
1222      jl:    list of rows to be added to uneliminated rows
1223             i>= k: jl(i) is the first row to be added to row i
1224             i<  k: jl(i) is the row following row i in some list of rows
1225             jl(i) = mbs indicates the end of a list
1226      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1227             row i of U */
1228   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1229 
1230   do {
1231     sctx.newshift = PETSC_FALSE;
1232     il[0]         = 0;
1233     for (i = 0; i < mbs; i++) {
1234       rtmp[i] = 0.0;
1235       jl[i]   = mbs;
1236     }
1237 
1238     for (k = 0; k < mbs; k++) {
1239       /*initialize k-th row by the perm[k]-th row of A */
1240       jmin = ai[rip[k]];
1241       jmax = ai[rip[k] + 1];
1242       bval = ba + bi[k];
1243       for (j = jmin; j < jmax; j++) {
1244         col       = rip[aj[j]];
1245         rtmp[col] = aa[j];
1246         *bval++   = 0.0; /* for in-place factorization */
1247       }
1248 
1249       /* shift the diagonal of the matrix */
1250       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1251 
1252       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1253       dk = rtmp[k];
1254       i  = jl[k]; /* first row to be added to k_th row  */
1255 
1256       while (i < k) {
1257         nexti = jl[i]; /* next row to be added to k_th row */
1258 
1259         /* compute multiplier, update diag(k) and U(i,k) */
1260         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1261         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1262         dk += uikdi * ba[ili];
1263         ba[ili] = uikdi; /* -U(i,k) */
1264 
1265         /* add multiple of row i to k-th row */
1266         jmin = ili + 1;
1267         jmax = bi[i + 1];
1268         if (jmin < jmax) {
1269           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1270           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1271 
1272           /* update il and jl for row i */
1273           il[i] = jmin;
1274           j     = bj[jmin];
1275           jl[i] = jl[j];
1276           jl[j] = i;
1277         }
1278         i = nexti;
1279       }
1280 
1281       /* shift the diagonals when zero pivot is detected */
1282       /* compute rs=sum of abs(off-diagonal) */
1283       rs   = 0.0;
1284       jmin = bi[k] + 1;
1285       nz   = bi[k + 1] - jmin;
1286       if (nz) {
1287         bcol = bj + jmin;
1288         while (nz--) {
1289           rs += PetscAbsScalar(rtmp[*bcol]);
1290           bcol++;
1291         }
1292       }
1293 
1294       sctx.rs = rs;
1295       sctx.pv = dk;
1296       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1297       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1298       dk = sctx.pv;
1299 
1300       /* copy data into U(k,:) */
1301       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1302       jmin      = bi[k] + 1;
1303       jmax      = bi[k + 1];
1304       if (jmin < jmax) {
1305         for (j = jmin; j < jmax; j++) {
1306           col       = bj[j];
1307           ba[j]     = rtmp[col];
1308           rtmp[col] = 0.0;
1309         }
1310         /* add the k-th row into il and jl */
1311         il[k] = jmin;
1312         i     = bj[jmin];
1313         jl[k] = jl[i];
1314         jl[i] = k;
1315       }
1316     }
1317   } while (sctx.newshift);
1318   PetscCall(PetscFree3(rtmp, il, jl));
1319   if (a->permute) PetscCall(PetscFree(aa));
1320 
1321   PetscCall(ISRestoreIndices(ip, &rip));
1322 
1323   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1324   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1325   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1326   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1327   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1328   C->assembled           = PETSC_TRUE;
1329   C->preallocated        = PETSC_TRUE;
1330 
1331   PetscCall(PetscLogFlops(C->rmap->N));
1332   if (sctx.nshift) {
1333     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1334       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1335     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1336       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1337     }
1338   }
1339   PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341 
1342 /*
1343   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1344   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1345 */
1346 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1347 {
1348   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1349   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1350   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1351   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1352   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1353   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1354   FactorShiftCtx sctx;
1355   PetscReal      rs;
1356   MatScalar      d, *v;
1357 
1358   PetscFunctionBegin;
1359   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1360 
1361   /* MatPivotSetUp(): initialize shift context sctx */
1362   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1363 
1364   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1365     sctx.shift_top = info->zeropivot;
1366 
1367     PetscCall(PetscArrayzero(rtmp, mbs));
1368 
1369     for (i = 0; i < mbs; i++) {
1370       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1371       d = (aa)[a->diag[i]];
1372       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1373       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1374       v     = aa + ai[i] + 1;
1375       nz    = ai[i + 1] - ai[i] - 1;
1376       for (j = 0; j < nz; j++) {
1377         rtmp[i] += PetscAbsScalar(v[j]);
1378         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1379       }
1380       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1381     }
1382     sctx.shift_top *= 1.1;
1383     sctx.nshift_max = 5;
1384     sctx.shift_lo   = 0.;
1385     sctx.shift_hi   = 1.;
1386   }
1387 
1388   /* allocate working arrays
1389      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1390      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1391   */
1392   do {
1393     sctx.newshift = PETSC_FALSE;
1394 
1395     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1396     if (mbs) il[0] = 0;
1397 
1398     for (k = 0; k < mbs; k++) {
1399       /* zero rtmp */
1400       nz    = bi[k + 1] - bi[k];
1401       bjtmp = bj + bi[k];
1402       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1403 
1404       /* load in initial unfactored row */
1405       bval = ba + bi[k];
1406       jmin = ai[k];
1407       jmax = ai[k + 1];
1408       for (j = jmin; j < jmax; j++) {
1409         col       = aj[j];
1410         rtmp[col] = aa[j];
1411         *bval++   = 0.0; /* for in-place factorization */
1412       }
1413       /* shift the diagonal of the matrix: ZeropivotApply() */
1414       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1415 
1416       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1417       dk = rtmp[k];
1418       i  = c2r[k]; /* first row to be added to k_th row  */
1419 
1420       while (i < k) {
1421         nexti = c2r[i]; /* next row to be added to k_th row */
1422 
1423         /* compute multiplier, update diag(k) and U(i,k) */
1424         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1425         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1426         dk += uikdi * ba[ili];           /* update diag[k] */
1427         ba[ili] = uikdi;                 /* -U(i,k) */
1428 
1429         /* add multiple of row i to k-th row */
1430         jmin = ili + 1;
1431         jmax = bi[i + 1];
1432         if (jmin < jmax) {
1433           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1434           /* update il and c2r for row i */
1435           il[i]  = jmin;
1436           j      = bj[jmin];
1437           c2r[i] = c2r[j];
1438           c2r[j] = i;
1439         }
1440         i = nexti;
1441       }
1442 
1443       /* copy data into U(k,:) */
1444       rs   = 0.0;
1445       jmin = bi[k];
1446       jmax = bi[k + 1] - 1;
1447       if (jmin < jmax) {
1448         for (j = jmin; j < jmax; j++) {
1449           col   = bj[j];
1450           ba[j] = rtmp[col];
1451           rs += PetscAbsScalar(ba[j]);
1452         }
1453         /* add the k-th row into il and c2r */
1454         il[k]  = jmin;
1455         i      = bj[jmin];
1456         c2r[k] = c2r[i];
1457         c2r[i] = k;
1458       }
1459 
1460       sctx.rs = rs;
1461       sctx.pv = dk;
1462       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1463       if (sctx.newshift) break;
1464       dk = sctx.pv;
1465 
1466       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1467     }
1468   } while (sctx.newshift);
1469 
1470   PetscCall(PetscFree3(rtmp, il, c2r));
1471 
1472   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1473   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1474   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1476   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1477   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1478 
1479   B->assembled    = PETSC_TRUE;
1480   B->preallocated = PETSC_TRUE;
1481 
1482   PetscCall(PetscLogFlops(B->rmap->n));
1483 
1484   /* MatPivotView() */
1485   if (sctx.nshift) {
1486     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1487       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1488     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1489       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1490     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1491       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1492     }
1493   }
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1498 {
1499   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1500   PetscInt       i, j, mbs = a->mbs;
1501   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1502   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1503   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1504   PetscReal      rs;
1505   FactorShiftCtx sctx;
1506 
1507   PetscFunctionBegin;
1508   /* MatPivotSetUp(): initialize shift context sctx */
1509   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1510 
1511   /* initialization */
1512   /* il and jl record the first nonzero element in each row of the accessing
1513      window U(0:k, k:mbs-1).
1514      jl:    list of rows to be added to uneliminated rows
1515             i>= k: jl(i) is the first row to be added to row i
1516             i<  k: jl(i) is the row following row i in some list of rows
1517             jl(i) = mbs indicates the end of a list
1518      il(i): points to the first nonzero element in U(i,k:mbs-1)
1519   */
1520   PetscCall(PetscMalloc1(mbs, &rtmp));
1521   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1522 
1523   do {
1524     sctx.newshift = PETSC_FALSE;
1525     il[0]         = 0;
1526     for (i = 0; i < mbs; i++) {
1527       rtmp[i] = 0.0;
1528       jl[i]   = mbs;
1529     }
1530 
1531     for (k = 0; k < mbs; k++) {
1532       /*initialize k-th row with elements nonzero in row perm(k) of A */
1533       nz   = ai[k + 1] - ai[k];
1534       acol = aj + ai[k];
1535       aval = aa + ai[k];
1536       bval = ba + bi[k];
1537       while (nz--) {
1538         rtmp[*acol++] = *aval++;
1539         *bval++       = 0.0; /* for in-place factorization */
1540       }
1541 
1542       /* shift the diagonal of the matrix */
1543       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1544 
1545       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1546       dk = rtmp[k];
1547       i  = jl[k]; /* first row to be added to k_th row  */
1548 
1549       while (i < k) {
1550         nexti = jl[i]; /* next row to be added to k_th row */
1551         /* compute multiplier, update D(k) and U(i,k) */
1552         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1553         uikdi = -ba[ili] * ba[bi[i]];
1554         dk += uikdi * ba[ili];
1555         ba[ili] = uikdi; /* -U(i,k) */
1556 
1557         /* add multiple of row i to k-th row ... */
1558         jmin = ili + 1;
1559         nz   = bi[i + 1] - jmin;
1560         if (nz > 0) {
1561           bcol = bj + jmin;
1562           bval = ba + jmin;
1563           PetscCall(PetscLogFlops(2.0 * nz));
1564           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1565 
1566           /* update il and jl for i-th row */
1567           il[i] = jmin;
1568           j     = bj[jmin];
1569           jl[i] = jl[j];
1570           jl[j] = i;
1571         }
1572         i = nexti;
1573       }
1574 
1575       /* shift the diagonals when zero pivot is detected */
1576       /* compute rs=sum of abs(off-diagonal) */
1577       rs   = 0.0;
1578       jmin = bi[k] + 1;
1579       nz   = bi[k + 1] - jmin;
1580       if (nz) {
1581         bcol = bj + jmin;
1582         while (nz--) {
1583           rs += PetscAbsScalar(rtmp[*bcol]);
1584           bcol++;
1585         }
1586       }
1587 
1588       sctx.rs = rs;
1589       sctx.pv = dk;
1590       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1591       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1592       dk = sctx.pv;
1593 
1594       /* copy data into U(k,:) */
1595       ba[bi[k]] = 1.0 / dk;
1596       jmin      = bi[k] + 1;
1597       nz        = bi[k + 1] - jmin;
1598       if (nz) {
1599         bcol = bj + jmin;
1600         bval = ba + jmin;
1601         while (nz--) {
1602           *bval++       = rtmp[*bcol];
1603           rtmp[*bcol++] = 0.0;
1604         }
1605         /* add k-th row into il and jl */
1606         il[k] = jmin;
1607         i     = bj[jmin];
1608         jl[k] = jl[i];
1609         jl[i] = k;
1610       }
1611     } /* end of for (k = 0; k<mbs; k++) */
1612   } while (sctx.newshift);
1613   PetscCall(PetscFree(rtmp));
1614   PetscCall(PetscFree2(il, jl));
1615 
1616   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1617   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1618   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621 
1622   C->assembled    = PETSC_TRUE;
1623   C->preallocated = PETSC_TRUE;
1624 
1625   PetscCall(PetscLogFlops(C->rmap->N));
1626   if (sctx.nshift) {
1627     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1628       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1629     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1630       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1631     }
1632   }
1633   PetscFunctionReturn(PETSC_SUCCESS);
1634 }
1635 
1636 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1637 {
1638   Mat C;
1639 
1640   PetscFunctionBegin;
1641   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1642   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1643   PetscCall(MatCholeskyFactorNumeric(C, A, info));
1644 
1645   A->ops->solve          = C->ops->solve;
1646   A->ops->solvetranspose = C->ops->solvetranspose;
1647 
1648   PetscCall(MatHeaderMerge(A, &C));
1649   PetscFunctionReturn(PETSC_SUCCESS);
1650 }
1651