xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6 
7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) {
8   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9   MatScalar    *dd   = fact->a;
10   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
11 
12   PetscFunctionBegin;
13   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
14 
15   nneg_tmp = 0;
16   npos_tmp = 0;
17   for (i = 0; i < mbs; i++) {
18     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20     fi++;
21   }
22   if (nneg) *nneg = nneg_tmp;
23   if (npos) *npos = npos_tmp;
24   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25   PetscFunctionReturn(0);
26 }
27 
28 /*
29   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31 */
32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
33   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
34   const PetscInt *rip, *ai, *aj;
35   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
36   PetscInt        m, reallocs = 0, prow;
37   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
38   PetscReal       f = info->fill;
39   PetscBool       perm_identity;
40 
41   PetscFunctionBegin;
42   /* check whether perm is the identity mapping */
43   PetscCall(ISIdentity(perm, &perm_identity));
44   PetscCall(ISGetIndices(perm, &rip));
45 
46   if (perm_identity) { /* without permutation */
47     a->permute = PETSC_FALSE;
48 
49     ai = a->i;
50     aj = a->j;
51   } else { /* non-trivial permutation */
52     a->permute = PETSC_TRUE;
53 
54     PetscCall(MatReorderingSeqSBAIJ(A, perm));
55 
56     ai = a->inew;
57     aj = a->jnew;
58   }
59 
60   /* initialization */
61   PetscCall(PetscMalloc1(mbs + 1, &iu));
62   umax = (PetscInt)(f * ai[mbs] + 1);
63   umax += mbs + 1;
64   PetscCall(PetscMalloc1(umax, &ju));
65   iu[0] = mbs + 1;
66   juidx = mbs + 1; /* index for ju */
67   /* jl linked list for pivot row -- linked list for col index */
68   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
69   for (i = 0; i < mbs; i++) {
70     jl[i] = mbs;
71     q[i]  = 0;
72   }
73 
74   /* for each row k */
75   for (k = 0; k < mbs; k++) {
76     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
77     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
78     q[k] = mbs;
79     /* initialize nonzero structure of k-th row to row rip[k] of A */
80     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
81     jmax = ai[rip[k] + 1];
82     for (j = jmin; j < jmax; j++) {
83       vj = rip[aj[j]]; /* col. value */
84       if (vj > k) {
85         qm = k;
86         do {
87           m  = qm;
88           qm = q[m];
89         } while (qm < vj);
90         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
91         nzk++;
92         q[m]  = vj;
93         q[vj] = qm;
94       } /* if (vj > k) */
95     }   /* for (j=jmin; j<jmax; j++) */
96 
97     /* modify nonzero structure of k-th row by computing fill-in
98        for each row i to be merged in */
99     prow = k;
100     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
101 
102     while (prow < k) {
103       /* merge row prow into k-th row */
104       jmin = iu[prow] + 1;
105       jmax = iu[prow + 1];
106       qm   = k;
107       for (j = jmin; j < jmax; j++) {
108         vj = ju[j];
109         do {
110           m  = qm;
111           qm = q[m];
112         } while (qm < vj);
113         if (qm != vj) {
114           nzk++;
115           q[m]  = vj;
116           q[vj] = qm;
117           qm    = vj;
118         }
119       }
120       prow = jl[prow]; /* next pivot row */
121     }
122 
123     /* add k to row list for first nonzero element in k-th row */
124     if (nzk > 0) {
125       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
126       jl[k] = jl[i];
127       jl[i] = k;
128     }
129     iu[k + 1] = iu[k] + nzk;
130 
131     /* allocate more space to ju if needed */
132     if (iu[k + 1] > umax) {
133       /* estimate how much additional space we will need */
134       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
135       /* just double the memory each time */
136       maxadd = umax;
137       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
138       umax += maxadd;
139 
140       /* allocate a longer ju */
141       PetscCall(PetscMalloc1(umax, &jutmp));
142       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
143       PetscCall(PetscFree(ju));
144       ju = jutmp;
145       reallocs++; /* count how many times we realloc */
146     }
147 
148     /* save nonzero structure of k-th row in ju */
149     i = k;
150     while (nzk--) {
151       i           = q[i];
152       ju[juidx++] = i;
153     }
154   }
155 
156 #if defined(PETSC_USE_INFO)
157   if (ai[mbs] != 0) {
158     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
159     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
160     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
161     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
162     PetscCall(PetscInfo(A, "for best performance.\n"));
163   } else {
164     PetscCall(PetscInfo(A, "Empty matrix\n"));
165   }
166 #endif
167 
168   PetscCall(ISRestoreIndices(perm, &rip));
169   PetscCall(PetscFree2(jl, q));
170 
171   /* put together the new matrix */
172   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
173 
174   b               = (Mat_SeqSBAIJ *)(F)->data;
175   b->singlemalloc = PETSC_FALSE;
176   b->free_a       = PETSC_TRUE;
177   b->free_ij      = PETSC_TRUE;
178 
179   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
180   b->j    = ju;
181   b->i    = iu;
182   b->diag = NULL;
183   b->ilen = NULL;
184   b->imax = NULL;
185   b->row  = perm;
186 
187   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
188 
189   PetscCall(PetscObjectReference((PetscObject)perm));
190 
191   b->icol = perm;
192   PetscCall(PetscObjectReference((PetscObject)perm));
193   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
194   /* In b structure:  Free imax, ilen, old a, old j.
195      Allocate idnew, solve_work, new a, new j */
196   b->maxnz = b->nz = iu[mbs];
197 
198   (F)->info.factor_mallocs   = reallocs;
199   (F)->info.fill_ratio_given = f;
200   if (ai[mbs] != 0) {
201     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
202   } else {
203     (F)->info.fill_ratio_needed = 0.0;
204   }
205   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
206   PetscFunctionReturn(0);
207 }
208 /*
209     Symbolic U^T*D*U factorization for SBAIJ format.
210     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
211 */
212 #include <petscbt.h>
213 #include <../src/mat/utils/freespace.h>
214 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
215   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
216   Mat_SeqSBAIJ      *b;
217   PetscBool          perm_identity, missing;
218   PetscReal          fill = info->fill;
219   const PetscInt    *rip, *ai = a->i, *aj = a->j;
220   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
221   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
222   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
223   PetscFreeSpaceList free_space = NULL, current_space = NULL;
224   PetscBT            lnkbt;
225 
226   PetscFunctionBegin;
227   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
228   PetscCall(MatMissingDiagonal(A, &missing, &i));
229   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
230   if (bs > 1) {
231     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
232     PetscFunctionReturn(0);
233   }
234 
235   /* check whether perm is the identity mapping */
236   PetscCall(ISIdentity(perm, &perm_identity));
237   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
238   a->permute = PETSC_FALSE;
239   PetscCall(ISGetIndices(perm, &rip));
240 
241   /* initialization */
242   PetscCall(PetscMalloc1(mbs + 1, &ui));
243   PetscCall(PetscMalloc1(mbs + 1, &udiag));
244   ui[0] = 0;
245 
246   /* jl: linked list for storing indices of the pivot rows
247      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
248   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
249   for (i = 0; i < mbs; i++) {
250     jl[i] = mbs;
251     il[i] = 0;
252   }
253 
254   /* create and initialize a linked list for storing column indices of the active row k */
255   nlnk = mbs + 1;
256   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
257 
258   /* initial FreeSpace size is fill*(ai[mbs]+1) */
259   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
260   current_space = free_space;
261 
262   for (k = 0; k < mbs; k++) { /* for each active row k */
263     /* initialize lnk by the column indices of row rip[k] of A */
264     nzk   = 0;
265     ncols = ai[k + 1] - ai[k];
266     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
267     for (j = 0; j < ncols; j++) {
268       i       = *(aj + ai[k] + j);
269       cols[j] = i;
270     }
271     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
272     nzk += nlnk;
273 
274     /* update lnk by computing fill-in for each pivot row to be merged in */
275     prow = jl[k]; /* 1st pivot row */
276 
277     while (prow < k) {
278       nextprow = jl[prow];
279       /* merge prow into k-th row */
280       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
281       jmax     = ui[prow + 1];
282       ncols    = jmax - jmin;
283       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
284       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
285       nzk += nlnk;
286 
287       /* update il and jl for prow */
288       if (jmin < jmax) {
289         il[prow] = jmin;
290         j        = *uj_ptr;
291         jl[prow] = jl[j];
292         jl[j]    = prow;
293       }
294       prow = nextprow;
295     }
296 
297     /* if free space is not available, make more free space */
298     if (current_space->local_remaining < nzk) {
299       i = mbs - k + 1;                                   /* num of unfactored rows */
300       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
301       PetscCall(PetscFreeSpaceGet(i, &current_space));
302       reallocs++;
303     }
304 
305     /* copy data into free space, then initialize lnk */
306     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
307 
308     /* add the k-th row into il and jl */
309     if (nzk > 1) {
310       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
311       jl[k] = jl[i];
312       jl[i] = k;
313       il[k] = ui[k] + 1;
314     }
315     ui_ptr[k] = current_space->array;
316 
317     current_space->array += nzk;
318     current_space->local_used += nzk;
319     current_space->local_remaining -= nzk;
320 
321     ui[k + 1] = ui[k] + nzk;
322   }
323 
324   PetscCall(ISRestoreIndices(perm, &rip));
325   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
326 
327   /* destroy list of free space and other temporary array(s) */
328   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
329   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
330   PetscCall(PetscLLDestroy(lnk, lnkbt));
331 
332   /* put together the new matrix in MATSEQSBAIJ format */
333   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
334 
335   b               = (Mat_SeqSBAIJ *)fact->data;
336   b->singlemalloc = PETSC_FALSE;
337   b->free_a       = PETSC_TRUE;
338   b->free_ij      = PETSC_TRUE;
339 
340   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
341 
342   b->j         = uj;
343   b->i         = ui;
344   b->diag      = udiag;
345   b->free_diag = PETSC_TRUE;
346   b->ilen      = NULL;
347   b->imax      = NULL;
348   b->row       = perm;
349   b->icol      = perm;
350 
351   PetscCall(PetscObjectReference((PetscObject)perm));
352   PetscCall(PetscObjectReference((PetscObject)perm));
353 
354   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
355 
356   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
357 
358   b->maxnz = b->nz = ui[mbs];
359 
360   fact->info.factor_mallocs   = reallocs;
361   fact->info.fill_ratio_given = fill;
362   if (ai[mbs] != 0) {
363     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
364   } else {
365     fact->info.fill_ratio_needed = 0.0;
366   }
367 #if defined(PETSC_USE_INFO)
368   if (ai[mbs] != 0) {
369     PetscReal af = fact->info.fill_ratio_needed;
370     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
371     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
372     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
373   } else {
374     PetscCall(PetscInfo(A, "Empty matrix\n"));
375   }
376 #endif
377   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
382   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
383   Mat_SeqSBAIJ      *b;
384   PetscBool          perm_identity, missing;
385   PetscReal          fill = info->fill;
386   const PetscInt    *rip, *ai, *aj;
387   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
388   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
389   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
390   PetscFreeSpaceList free_space = NULL, current_space = NULL;
391   PetscBT            lnkbt;
392 
393   PetscFunctionBegin;
394   PetscCall(MatMissingDiagonal(A, &missing, &d));
395   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
396 
397   /*
398    This code originally uses Modified Sparse Row (MSR) storage
399    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
400    Then it is rewritten so the factor B takes seqsbaij format. However the associated
401    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
402    thus the original code in MSR format is still used for these cases.
403    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
404    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
405   */
406   if (bs > 1) {
407     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
408     PetscFunctionReturn(0);
409   }
410 
411   /* check whether perm is the identity mapping */
412   PetscCall(ISIdentity(perm, &perm_identity));
413   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
414   a->permute = PETSC_FALSE;
415   ai         = a->i;
416   aj         = a->j;
417   PetscCall(ISGetIndices(perm, &rip));
418 
419   /* initialization */
420   PetscCall(PetscMalloc1(mbs + 1, &ui));
421   ui[0] = 0;
422 
423   /* jl: linked list for storing indices of the pivot rows
424      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
425   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
426   for (i = 0; i < mbs; i++) {
427     jl[i] = mbs;
428     il[i] = 0;
429   }
430 
431   /* create and initialize a linked list for storing column indices of the active row k */
432   nlnk = mbs + 1;
433   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
434 
435   /* initial FreeSpace size is fill*(ai[mbs]+1) */
436   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
437   current_space = free_space;
438 
439   for (k = 0; k < mbs; k++) { /* for each active row k */
440     /* initialize lnk by the column indices of row rip[k] of A */
441     nzk   = 0;
442     ncols = ai[rip[k] + 1] - ai[rip[k]];
443     for (j = 0; j < ncols; j++) {
444       i       = *(aj + ai[rip[k]] + j);
445       cols[j] = rip[i];
446     }
447     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
448     nzk += nlnk;
449 
450     /* update lnk by computing fill-in for each pivot row to be merged in */
451     prow = jl[k]; /* 1st pivot row */
452 
453     while (prow < k) {
454       nextprow = jl[prow];
455       /* merge prow into k-th row */
456       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
457       jmax     = ui[prow + 1];
458       ncols    = jmax - jmin;
459       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
460       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
461       nzk += nlnk;
462 
463       /* update il and jl for prow */
464       if (jmin < jmax) {
465         il[prow] = jmin;
466 
467         j        = *uj_ptr;
468         jl[prow] = jl[j];
469         jl[j]    = prow;
470       }
471       prow = nextprow;
472     }
473 
474     /* if free space is not available, make more free space */
475     if (current_space->local_remaining < nzk) {
476       i = mbs - k + 1;                                                            /* num of unfactored rows */
477       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
478       PetscCall(PetscFreeSpaceGet(i, &current_space));
479       reallocs++;
480     }
481 
482     /* copy data into free space, then initialize lnk */
483     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
484 
485     /* add the k-th row into il and jl */
486     if (nzk - 1 > 0) {
487       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
488       jl[k] = jl[i];
489       jl[i] = k;
490       il[k] = ui[k] + 1;
491     }
492     ui_ptr[k] = current_space->array;
493 
494     current_space->array += nzk;
495     current_space->local_used += nzk;
496     current_space->local_remaining -= nzk;
497 
498     ui[k + 1] = ui[k] + nzk;
499   }
500 
501   PetscCall(ISRestoreIndices(perm, &rip));
502   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
503 
504   /* destroy list of free space and other temporary array(s) */
505   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
506   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
507   PetscCall(PetscLLDestroy(lnk, lnkbt));
508 
509   /* put together the new matrix in MATSEQSBAIJ format */
510   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
511 
512   b               = (Mat_SeqSBAIJ *)fact->data;
513   b->singlemalloc = PETSC_FALSE;
514   b->free_a       = PETSC_TRUE;
515   b->free_ij      = PETSC_TRUE;
516 
517   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
518 
519   b->j    = uj;
520   b->i    = ui;
521   b->diag = NULL;
522   b->ilen = NULL;
523   b->imax = NULL;
524   b->row  = perm;
525 
526   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
527 
528   PetscCall(PetscObjectReference((PetscObject)perm));
529   b->icol = perm;
530   PetscCall(PetscObjectReference((PetscObject)perm));
531   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
532   b->maxnz = b->nz = ui[mbs];
533 
534   fact->info.factor_mallocs   = reallocs;
535   fact->info.fill_ratio_given = fill;
536   if (ai[mbs] != 0) {
537     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
538   } else {
539     fact->info.fill_ratio_needed = 0.0;
540   }
541 #if defined(PETSC_USE_INFO)
542   if (ai[mbs] != 0) {
543     PetscReal af = fact->info.fill_ratio_needed;
544     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
545     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
546     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
547   } else {
548     PetscCall(PetscInfo(A, "Empty matrix\n"));
549   }
550 #endif
551   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
552   PetscFunctionReturn(0);
553 }
554 
555 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) {
556   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
557   IS              perm = b->row;
558   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
559   PetscInt        i, j;
560   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
561   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
562   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
563   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
564   MatScalar      *work;
565   PetscInt       *pivots;
566   PetscBool       allowzeropivot, zeropivotdetected;
567 
568   PetscFunctionBegin;
569   /* initialization */
570   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
571   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
572   allowzeropivot = PetscNot(A->erroriffailure);
573 
574   il[0] = 0;
575   for (i = 0; i < mbs; i++) jl[i] = mbs;
576 
577   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
578   PetscCall(PetscMalloc1(bs, &pivots));
579 
580   PetscCall(ISGetIndices(perm, &perm_ptr));
581 
582   /* check permutation */
583   if (!a->permute) {
584     ai = a->i;
585     aj = a->j;
586     aa = a->a;
587   } else {
588     ai = a->inew;
589     aj = a->jnew;
590     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
591     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
592     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
593     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
594 
595     for (i = 0; i < mbs; i++) {
596       jmin = ai[i];
597       jmax = ai[i + 1];
598       for (j = jmin; j < jmax; j++) {
599         while (a2anew[j] != j) {
600           k         = a2anew[j];
601           a2anew[j] = a2anew[k];
602           a2anew[k] = k;
603           for (k1 = 0; k1 < bs2; k1++) {
604             dk[k1]           = aa[k * bs2 + k1];
605             aa[k * bs2 + k1] = aa[j * bs2 + k1];
606             aa[j * bs2 + k1] = dk[k1];
607           }
608         }
609         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
610         if (i > aj[j]) {
611           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
612           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
613           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
614             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
615           }
616         }
617       }
618     }
619     PetscCall(PetscFree(a2anew));
620   }
621 
622   /* for each row k */
623   for (k = 0; k < mbs; k++) {
624     /*initialize k-th row with elements nonzero in row perm(k) of A */
625     jmin = ai[perm_ptr[k]];
626     jmax = ai[perm_ptr[k] + 1];
627 
628     ap = aa + jmin * bs2;
629     for (j = jmin; j < jmax; j++) {
630       vj       = perm_ptr[aj[j]]; /* block col. index */
631       rtmp_ptr = rtmp + vj * bs2;
632       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
633     }
634 
635     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
636     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
637     i = jl[k]; /* first row to be added to k_th row  */
638 
639     while (i < k) {
640       nexti = jl[i]; /* next row to be added to k_th row */
641 
642       /* compute multiplier */
643       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
644 
645       /* uik = -inv(Di)*U_bar(i,k) */
646       diag = ba + i * bs2;
647       u    = ba + ili * bs2;
648       PetscCall(PetscArrayzero(uik, bs2));
649       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
650 
651       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
652       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
653       PetscCall(PetscLogFlops(4.0 * bs * bs2));
654 
655       /* update -U(i,k) */
656       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
657 
658       /* add multiple of row i to k-th row ... */
659       jmin = ili + 1;
660       jmax = bi[i + 1];
661       if (jmin < jmax) {
662         for (j = jmin; j < jmax; j++) {
663           /* rtmp += -U(i,k)^T * U_bar(i,j) */
664           rtmp_ptr = rtmp + bj[j] * bs2;
665           u        = ba + j * bs2;
666           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
667         }
668         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
669 
670         /* ... add i to row list for next nonzero entry */
671         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
672         j     = bj[jmin];
673         jl[i] = jl[j];
674         jl[j] = i; /* update jl */
675       }
676       i = nexti;
677     }
678 
679     /* save nonzero entries in k-th row of U ... */
680 
681     /* invert diagonal block */
682     diag = ba + k * bs2;
683     PetscCall(PetscArraycpy(diag, dk, bs2));
684 
685     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
686     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
687 
688     jmin = bi[k];
689     jmax = bi[k + 1];
690     if (jmin < jmax) {
691       for (j = jmin; j < jmax; j++) {
692         vj       = bj[j]; /* block col. index of U */
693         u        = ba + j * bs2;
694         rtmp_ptr = rtmp + vj * bs2;
695         for (k1 = 0; k1 < bs2; k1++) {
696           *u++        = *rtmp_ptr;
697           *rtmp_ptr++ = 0.0;
698         }
699       }
700 
701       /* ... add k to row list for first nonzero entry in k-th row */
702       il[k] = jmin;
703       i     = bj[jmin];
704       jl[k] = jl[i];
705       jl[i] = k;
706     }
707   }
708 
709   PetscCall(PetscFree(rtmp));
710   PetscCall(PetscFree2(il, jl));
711   PetscCall(PetscFree3(dk, uik, work));
712   PetscCall(PetscFree(pivots));
713   if (a->permute) PetscCall(PetscFree(aa));
714 
715   PetscCall(ISRestoreIndices(perm, &perm_ptr));
716 
717   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
718   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
719   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
720   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
721 
722   C->assembled    = PETSC_TRUE;
723   C->preallocated = PETSC_TRUE;
724 
725   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
726   PetscFunctionReturn(0);
727 }
728 
729 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) {
730   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
731   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
732   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
733   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
734   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
735   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
736   MatScalar    *work;
737   PetscInt     *pivots;
738   PetscBool     allowzeropivot, zeropivotdetected;
739 
740   PetscFunctionBegin;
741   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
742   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
743   il[0] = 0;
744   for (i = 0; i < mbs; i++) jl[i] = mbs;
745 
746   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
747   PetscCall(PetscMalloc1(bs, &pivots));
748   allowzeropivot = PetscNot(A->erroriffailure);
749 
750   ai = a->i;
751   aj = a->j;
752   aa = a->a;
753 
754   /* for each row k */
755   for (k = 0; k < mbs; k++) {
756     /*initialize k-th row with elements nonzero in row k of A */
757     jmin = ai[k];
758     jmax = ai[k + 1];
759     ap   = aa + jmin * bs2;
760     for (j = jmin; j < jmax; j++) {
761       vj       = aj[j]; /* block col. index */
762       rtmp_ptr = rtmp + vj * bs2;
763       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
764     }
765 
766     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
767     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
768     i = jl[k]; /* first row to be added to k_th row  */
769 
770     while (i < k) {
771       nexti = jl[i]; /* next row to be added to k_th row */
772 
773       /* compute multiplier */
774       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
775 
776       /* uik = -inv(Di)*U_bar(i,k) */
777       diag = ba + i * bs2;
778       u    = ba + ili * bs2;
779       PetscCall(PetscArrayzero(uik, bs2));
780       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
781 
782       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
783       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
784       PetscCall(PetscLogFlops(2.0 * bs * bs2));
785 
786       /* update -U(i,k) */
787       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
788 
789       /* add multiple of row i to k-th row ... */
790       jmin = ili + 1;
791       jmax = bi[i + 1];
792       if (jmin < jmax) {
793         for (j = jmin; j < jmax; j++) {
794           /* rtmp += -U(i,k)^T * U_bar(i,j) */
795           rtmp_ptr = rtmp + bj[j] * bs2;
796           u        = ba + j * bs2;
797           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
798         }
799         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
800 
801         /* ... add i to row list for next nonzero entry */
802         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
803         j     = bj[jmin];
804         jl[i] = jl[j];
805         jl[j] = i; /* update jl */
806       }
807       i = nexti;
808     }
809 
810     /* save nonzero entries in k-th row of U ... */
811 
812     /* invert diagonal block */
813     diag = ba + k * bs2;
814     PetscCall(PetscArraycpy(diag, dk, bs2));
815 
816     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
817     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
818 
819     jmin = bi[k];
820     jmax = bi[k + 1];
821     if (jmin < jmax) {
822       for (j = jmin; j < jmax; j++) {
823         vj       = bj[j]; /* block col. index of U */
824         u        = ba + j * bs2;
825         rtmp_ptr = rtmp + vj * bs2;
826         for (k1 = 0; k1 < bs2; k1++) {
827           *u++        = *rtmp_ptr;
828           *rtmp_ptr++ = 0.0;
829         }
830       }
831 
832       /* ... add k to row list for first nonzero entry in k-th row */
833       il[k] = jmin;
834       i     = bj[jmin];
835       jl[k] = jl[i];
836       jl[i] = k;
837     }
838   }
839 
840   PetscCall(PetscFree(rtmp));
841   PetscCall(PetscFree2(il, jl));
842   PetscCall(PetscFree3(dk, uik, work));
843   PetscCall(PetscFree(pivots));
844 
845   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
846   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
847   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
848   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
849   C->assembled           = PETSC_TRUE;
850   C->preallocated        = PETSC_TRUE;
851 
852   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
853   PetscFunctionReturn(0);
854 }
855 
856 /*
857     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
858     Version for blocks 2 by 2.
859 */
860 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) {
861   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
862   IS              perm = b->row;
863   const PetscInt *ai, *aj, *perm_ptr;
864   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
865   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
866   MatScalar      *ba = b->a, *aa, *ap;
867   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
868   PetscReal       shift = info->shiftamount;
869   PetscBool       allowzeropivot, zeropivotdetected;
870 
871   PetscFunctionBegin;
872   allowzeropivot = PetscNot(A->erroriffailure);
873 
874   /* initialization */
875   /* il and jl record the first nonzero element in each row of the accessing
876      window U(0:k, k:mbs-1).
877      jl:    list of rows to be added to uneliminated rows
878             i>= k: jl(i) is the first row to be added to row i
879             i<  k: jl(i) is the row following row i in some list of rows
880             jl(i) = mbs indicates the end of a list
881      il(i): points to the first nonzero element in columns k,...,mbs-1 of
882             row i of U */
883   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
884   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
885   il[0] = 0;
886   for (i = 0; i < mbs; i++) jl[i] = mbs;
887 
888   PetscCall(ISGetIndices(perm, &perm_ptr));
889 
890   /* check permutation */
891   if (!a->permute) {
892     ai = a->i;
893     aj = a->j;
894     aa = a->a;
895   } else {
896     ai = a->inew;
897     aj = a->jnew;
898     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
899     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
900     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
901     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
902 
903     for (i = 0; i < mbs; i++) {
904       jmin = ai[i];
905       jmax = ai[i + 1];
906       for (j = jmin; j < jmax; j++) {
907         while (a2anew[j] != j) {
908           k         = a2anew[j];
909           a2anew[j] = a2anew[k];
910           a2anew[k] = k;
911           for (k1 = 0; k1 < 4; k1++) {
912             dk[k1]         = aa[k * 4 + k1];
913             aa[k * 4 + k1] = aa[j * 4 + k1];
914             aa[j * 4 + k1] = dk[k1];
915           }
916         }
917         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
918         if (i > aj[j]) {
919           ap    = aa + j * 4; /* ptr to the beginning of the block */
920           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
921           ap[1] = ap[2];
922           ap[2] = dk[1];
923         }
924       }
925     }
926     PetscCall(PetscFree(a2anew));
927   }
928 
929   /* for each row k */
930   for (k = 0; k < mbs; k++) {
931     /*initialize k-th row with elements nonzero in row perm(k) of A */
932     jmin = ai[perm_ptr[k]];
933     jmax = ai[perm_ptr[k] + 1];
934     ap   = aa + jmin * 4;
935     for (j = jmin; j < jmax; j++) {
936       vj       = perm_ptr[aj[j]]; /* block col. index */
937       rtmp_ptr = rtmp + vj * 4;
938       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
939     }
940 
941     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
942     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
943     i = jl[k]; /* first row to be added to k_th row  */
944 
945     while (i < k) {
946       nexti = jl[i]; /* next row to be added to k_th row */
947 
948       /* compute multiplier */
949       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
950 
951       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
952       diag   = ba + i * 4;
953       u      = ba + ili * 4;
954       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
955       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
956       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
957       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
958 
959       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
960       dk[0] += uik[0] * u[0] + uik[1] * u[1];
961       dk[1] += uik[2] * u[0] + uik[3] * u[1];
962       dk[2] += uik[0] * u[2] + uik[1] * u[3];
963       dk[3] += uik[2] * u[2] + uik[3] * u[3];
964 
965       PetscCall(PetscLogFlops(16.0 * 2.0));
966 
967       /* update -U(i,k): ba[ili] = uik */
968       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
969 
970       /* add multiple of row i to k-th row ... */
971       jmin = ili + 1;
972       jmax = bi[i + 1];
973       if (jmin < jmax) {
974         for (j = jmin; j < jmax; j++) {
975           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
976           rtmp_ptr = rtmp + bj[j] * 4;
977           u        = ba + j * 4;
978           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
979           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
980           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
981           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
982         }
983         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
984 
985         /* ... add i to row list for next nonzero entry */
986         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
987         j     = bj[jmin];
988         jl[i] = jl[j];
989         jl[j] = i; /* update jl */
990       }
991       i = nexti;
992     }
993 
994     /* save nonzero entries in k-th row of U ... */
995 
996     /* invert diagonal block */
997     diag = ba + k * 4;
998     PetscCall(PetscArraycpy(diag, dk, 4));
999     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1000     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1001 
1002     jmin = bi[k];
1003     jmax = bi[k + 1];
1004     if (jmin < jmax) {
1005       for (j = jmin; j < jmax; j++) {
1006         vj       = bj[j]; /* block col. index of U */
1007         u        = ba + j * 4;
1008         rtmp_ptr = rtmp + vj * 4;
1009         for (k1 = 0; k1 < 4; k1++) {
1010           *u++        = *rtmp_ptr;
1011           *rtmp_ptr++ = 0.0;
1012         }
1013       }
1014 
1015       /* ... add k to row list for first nonzero entry in k-th row */
1016       il[k] = jmin;
1017       i     = bj[jmin];
1018       jl[k] = jl[i];
1019       jl[i] = k;
1020     }
1021   }
1022 
1023   PetscCall(PetscFree(rtmp));
1024   PetscCall(PetscFree2(il, jl));
1025   if (a->permute) PetscCall(PetscFree(aa));
1026   PetscCall(ISRestoreIndices(perm, &perm_ptr));
1027 
1028   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1029   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1030   C->assembled           = PETSC_TRUE;
1031   C->preallocated        = PETSC_TRUE;
1032 
1033   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 /*
1038       Version for when blocks are 2 by 2 Using natural ordering
1039 */
1040 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) {
1041   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1042   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1043   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1044   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1045   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1046   PetscReal     shift = info->shiftamount;
1047   PetscBool     allowzeropivot, zeropivotdetected;
1048 
1049   PetscFunctionBegin;
1050   allowzeropivot = PetscNot(A->erroriffailure);
1051 
1052   /* initialization */
1053   /* il and jl record the first nonzero element in each row of the accessing
1054      window U(0:k, k:mbs-1).
1055      jl:    list of rows to be added to uneliminated rows
1056             i>= k: jl(i) is the first row to be added to row i
1057             i<  k: jl(i) is the row following row i in some list of rows
1058             jl(i) = mbs indicates the end of a list
1059      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1060             row i of U */
1061   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1062   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1063   il[0] = 0;
1064   for (i = 0; i < mbs; i++) jl[i] = mbs;
1065 
1066   ai = a->i;
1067   aj = a->j;
1068   aa = a->a;
1069 
1070   /* for each row k */
1071   for (k = 0; k < mbs; k++) {
1072     /*initialize k-th row with elements nonzero in row k of A */
1073     jmin = ai[k];
1074     jmax = ai[k + 1];
1075     ap   = aa + jmin * 4;
1076     for (j = jmin; j < jmax; j++) {
1077       vj       = aj[j]; /* block col. index */
1078       rtmp_ptr = rtmp + vj * 4;
1079       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1080     }
1081 
1082     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1083     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1084     i = jl[k]; /* first row to be added to k_th row  */
1085 
1086     while (i < k) {
1087       nexti = jl[i]; /* next row to be added to k_th row */
1088 
1089       /* compute multiplier */
1090       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1091 
1092       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1093       diag   = ba + i * 4;
1094       u      = ba + ili * 4;
1095       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1096       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1097       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1098       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1099 
1100       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1101       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1102       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1103       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1104       dk[3] += uik[2] * u[2] + uik[3] * u[3];
1105 
1106       PetscCall(PetscLogFlops(16.0 * 2.0));
1107 
1108       /* update -U(i,k): ba[ili] = uik */
1109       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1110 
1111       /* add multiple of row i to k-th row ... */
1112       jmin = ili + 1;
1113       jmax = bi[i + 1];
1114       if (jmin < jmax) {
1115         for (j = jmin; j < jmax; j++) {
1116           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1117           rtmp_ptr = rtmp + bj[j] * 4;
1118           u        = ba + j * 4;
1119           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1120           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1121           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1122           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1123         }
1124         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1125 
1126         /* ... add i to row list for next nonzero entry */
1127         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1128         j     = bj[jmin];
1129         jl[i] = jl[j];
1130         jl[j] = i; /* update jl */
1131       }
1132       i = nexti;
1133     }
1134 
1135     /* save nonzero entries in k-th row of U ... */
1136 
1137     /* invert diagonal block */
1138     diag = ba + k * 4;
1139     PetscCall(PetscArraycpy(diag, dk, 4));
1140     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1141     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1142 
1143     jmin = bi[k];
1144     jmax = bi[k + 1];
1145     if (jmin < jmax) {
1146       for (j = jmin; j < jmax; j++) {
1147         vj       = bj[j]; /* block col. index of U */
1148         u        = ba + j * 4;
1149         rtmp_ptr = rtmp + vj * 4;
1150         for (k1 = 0; k1 < 4; k1++) {
1151           *u++        = *rtmp_ptr;
1152           *rtmp_ptr++ = 0.0;
1153         }
1154       }
1155 
1156       /* ... add k to row list for first nonzero entry in k-th row */
1157       il[k] = jmin;
1158       i     = bj[jmin];
1159       jl[k] = jl[i];
1160       jl[i] = k;
1161     }
1162   }
1163 
1164   PetscCall(PetscFree(rtmp));
1165   PetscCall(PetscFree2(il, jl));
1166 
1167   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1168   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1169   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1170   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1171   C->assembled           = PETSC_TRUE;
1172   C->preallocated        = PETSC_TRUE;
1173 
1174   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 /*
1179     Numeric U^T*D*U factorization for SBAIJ format.
1180     Version for blocks are 1 by 1.
1181 */
1182 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) {
1183   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1184   IS              ip = b->row;
1185   const PetscInt *ai, *aj, *rip;
1186   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1187   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1188   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1189   PetscReal       rs;
1190   FactorShiftCtx  sctx;
1191 
1192   PetscFunctionBegin;
1193   /* MatPivotSetUp(): initialize shift context sctx */
1194   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1195 
1196   PetscCall(ISGetIndices(ip, &rip));
1197   if (!a->permute) {
1198     ai = a->i;
1199     aj = a->j;
1200     aa = a->a;
1201   } else {
1202     ai = a->inew;
1203     aj = a->jnew;
1204     nz = ai[mbs];
1205     PetscCall(PetscMalloc1(nz, &aa));
1206     a2anew = a->a2anew;
1207     bval   = a->a;
1208     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1209   }
1210 
1211   /* initialization */
1212   /* il and jl record the first nonzero element in each row of the accessing
1213      window U(0:k, k:mbs-1).
1214      jl:    list of rows to be added to uneliminated rows
1215             i>= k: jl(i) is the first row to be added to row i
1216             i<  k: jl(i) is the row following row i in some list of rows
1217             jl(i) = mbs indicates the end of a list
1218      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1219             row i of U */
1220   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1221 
1222   do {
1223     sctx.newshift = PETSC_FALSE;
1224     il[0]         = 0;
1225     for (i = 0; i < mbs; i++) {
1226       rtmp[i] = 0.0;
1227       jl[i]   = mbs;
1228     }
1229 
1230     for (k = 0; k < mbs; k++) {
1231       /*initialize k-th row by the perm[k]-th row of A */
1232       jmin = ai[rip[k]];
1233       jmax = ai[rip[k] + 1];
1234       bval = ba + bi[k];
1235       for (j = jmin; j < jmax; j++) {
1236         col       = rip[aj[j]];
1237         rtmp[col] = aa[j];
1238         *bval++   = 0.0; /* for in-place factorization */
1239       }
1240 
1241       /* shift the diagonal of the matrix */
1242       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1243 
1244       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1245       dk = rtmp[k];
1246       i  = jl[k]; /* first row to be added to k_th row  */
1247 
1248       while (i < k) {
1249         nexti = jl[i]; /* next row to be added to k_th row */
1250 
1251         /* compute multiplier, update diag(k) and U(i,k) */
1252         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1253         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1254         dk += uikdi * ba[ili];
1255         ba[ili] = uikdi; /* -U(i,k) */
1256 
1257         /* add multiple of row i to k-th row */
1258         jmin = ili + 1;
1259         jmax = bi[i + 1];
1260         if (jmin < jmax) {
1261           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1262           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1263 
1264           /* update il and jl for row i */
1265           il[i] = jmin;
1266           j     = bj[jmin];
1267           jl[i] = jl[j];
1268           jl[j] = i;
1269         }
1270         i = nexti;
1271       }
1272 
1273       /* shift the diagonals when zero pivot is detected */
1274       /* compute rs=sum of abs(off-diagonal) */
1275       rs   = 0.0;
1276       jmin = bi[k] + 1;
1277       nz   = bi[k + 1] - jmin;
1278       if (nz) {
1279         bcol = bj + jmin;
1280         while (nz--) {
1281           rs += PetscAbsScalar(rtmp[*bcol]);
1282           bcol++;
1283         }
1284       }
1285 
1286       sctx.rs = rs;
1287       sctx.pv = dk;
1288       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1289       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1290       dk = sctx.pv;
1291 
1292       /* copy data into U(k,:) */
1293       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1294       jmin      = bi[k] + 1;
1295       jmax      = bi[k + 1];
1296       if (jmin < jmax) {
1297         for (j = jmin; j < jmax; j++) {
1298           col       = bj[j];
1299           ba[j]     = rtmp[col];
1300           rtmp[col] = 0.0;
1301         }
1302         /* add the k-th row into il and jl */
1303         il[k] = jmin;
1304         i     = bj[jmin];
1305         jl[k] = jl[i];
1306         jl[i] = k;
1307       }
1308     }
1309   } while (sctx.newshift);
1310   PetscCall(PetscFree3(rtmp, il, jl));
1311   if (a->permute) PetscCall(PetscFree(aa));
1312 
1313   PetscCall(ISRestoreIndices(ip, &rip));
1314 
1315   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1316   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1317   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1318   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1319   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1320   C->assembled           = PETSC_TRUE;
1321   C->preallocated        = PETSC_TRUE;
1322 
1323   PetscCall(PetscLogFlops(C->rmap->N));
1324   if (sctx.nshift) {
1325     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1326       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1327     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1328       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1329     }
1330   }
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /*
1335   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1336   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1337 */
1338 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) {
1339   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1340   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1341   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1342   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1343   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1344   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1345   FactorShiftCtx sctx;
1346   PetscReal      rs;
1347   MatScalar      d, *v;
1348 
1349   PetscFunctionBegin;
1350   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1351 
1352   /* MatPivotSetUp(): initialize shift context sctx */
1353   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1354 
1355   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1356     sctx.shift_top = info->zeropivot;
1357 
1358     PetscCall(PetscArrayzero(rtmp, mbs));
1359 
1360     for (i = 0; i < mbs; i++) {
1361       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1362       d = (aa)[a->diag[i]];
1363       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1364       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1365       v     = aa + ai[i] + 1;
1366       nz    = ai[i + 1] - ai[i] - 1;
1367       for (j = 0; j < nz; j++) {
1368         rtmp[i] += PetscAbsScalar(v[j]);
1369         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1370       }
1371       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1372     }
1373     sctx.shift_top *= 1.1;
1374     sctx.nshift_max = 5;
1375     sctx.shift_lo   = 0.;
1376     sctx.shift_hi   = 1.;
1377   }
1378 
1379   /* allocate working arrays
1380      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1381      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1382   */
1383   do {
1384     sctx.newshift = PETSC_FALSE;
1385 
1386     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1387     if (mbs) il[0] = 0;
1388 
1389     for (k = 0; k < mbs; k++) {
1390       /* zero rtmp */
1391       nz    = bi[k + 1] - bi[k];
1392       bjtmp = bj + bi[k];
1393       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1394 
1395       /* load in initial unfactored row */
1396       bval = ba + bi[k];
1397       jmin = ai[k];
1398       jmax = ai[k + 1];
1399       for (j = jmin; j < jmax; j++) {
1400         col       = aj[j];
1401         rtmp[col] = aa[j];
1402         *bval++   = 0.0; /* for in-place factorization */
1403       }
1404       /* shift the diagonal of the matrix: ZeropivotApply() */
1405       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1406 
1407       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1408       dk = rtmp[k];
1409       i  = c2r[k]; /* first row to be added to k_th row  */
1410 
1411       while (i < k) {
1412         nexti = c2r[i]; /* next row to be added to k_th row */
1413 
1414         /* compute multiplier, update diag(k) and U(i,k) */
1415         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1416         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1417         dk += uikdi * ba[ili];           /* update diag[k] */
1418         ba[ili] = uikdi;                 /* -U(i,k) */
1419 
1420         /* add multiple of row i to k-th row */
1421         jmin = ili + 1;
1422         jmax = bi[i + 1];
1423         if (jmin < jmax) {
1424           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1425           /* update il and c2r for row i */
1426           il[i]  = jmin;
1427           j      = bj[jmin];
1428           c2r[i] = c2r[j];
1429           c2r[j] = i;
1430         }
1431         i = nexti;
1432       }
1433 
1434       /* copy data into U(k,:) */
1435       rs   = 0.0;
1436       jmin = bi[k];
1437       jmax = bi[k + 1] - 1;
1438       if (jmin < jmax) {
1439         for (j = jmin; j < jmax; j++) {
1440           col   = bj[j];
1441           ba[j] = rtmp[col];
1442           rs += PetscAbsScalar(ba[j]);
1443         }
1444         /* add the k-th row into il and c2r */
1445         il[k]  = jmin;
1446         i      = bj[jmin];
1447         c2r[k] = c2r[i];
1448         c2r[i] = k;
1449       }
1450 
1451       sctx.rs = rs;
1452       sctx.pv = dk;
1453       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1454       if (sctx.newshift) break;
1455       dk = sctx.pv;
1456 
1457       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1458     }
1459   } while (sctx.newshift);
1460 
1461   PetscCall(PetscFree3(rtmp, il, c2r));
1462 
1463   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1464   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1465   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1466   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1467   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1468   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1469 
1470   B->assembled    = PETSC_TRUE;
1471   B->preallocated = PETSC_TRUE;
1472 
1473   PetscCall(PetscLogFlops(B->rmap->n));
1474 
1475   /* MatPivotView() */
1476   if (sctx.nshift) {
1477     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1478       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1479     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1480       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1481     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1482       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1483     }
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) {
1489   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1490   PetscInt       i, j, mbs = a->mbs;
1491   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1492   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1493   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1494   PetscReal      rs;
1495   FactorShiftCtx sctx;
1496 
1497   PetscFunctionBegin;
1498   /* MatPivotSetUp(): initialize shift context sctx */
1499   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1500 
1501   /* initialization */
1502   /* il and jl record the first nonzero element in each row of the accessing
1503      window U(0:k, k:mbs-1).
1504      jl:    list of rows to be added to uneliminated rows
1505             i>= k: jl(i) is the first row to be added to row i
1506             i<  k: jl(i) is the row following row i in some list of rows
1507             jl(i) = mbs indicates the end of a list
1508      il(i): points to the first nonzero element in U(i,k:mbs-1)
1509   */
1510   PetscCall(PetscMalloc1(mbs, &rtmp));
1511   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1512 
1513   do {
1514     sctx.newshift = PETSC_FALSE;
1515     il[0]         = 0;
1516     for (i = 0; i < mbs; i++) {
1517       rtmp[i] = 0.0;
1518       jl[i]   = mbs;
1519     }
1520 
1521     for (k = 0; k < mbs; k++) {
1522       /*initialize k-th row with elements nonzero in row perm(k) of A */
1523       nz   = ai[k + 1] - ai[k];
1524       acol = aj + ai[k];
1525       aval = aa + ai[k];
1526       bval = ba + bi[k];
1527       while (nz--) {
1528         rtmp[*acol++] = *aval++;
1529         *bval++       = 0.0; /* for in-place factorization */
1530       }
1531 
1532       /* shift the diagonal of the matrix */
1533       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1534 
1535       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1536       dk = rtmp[k];
1537       i  = jl[k]; /* first row to be added to k_th row  */
1538 
1539       while (i < k) {
1540         nexti = jl[i]; /* next row to be added to k_th row */
1541         /* compute multiplier, update D(k) and U(i,k) */
1542         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1543         uikdi = -ba[ili] * ba[bi[i]];
1544         dk += uikdi * ba[ili];
1545         ba[ili] = uikdi; /* -U(i,k) */
1546 
1547         /* add multiple of row i to k-th row ... */
1548         jmin = ili + 1;
1549         nz   = bi[i + 1] - jmin;
1550         if (nz > 0) {
1551           bcol = bj + jmin;
1552           bval = ba + jmin;
1553           PetscCall(PetscLogFlops(2.0 * nz));
1554           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1555 
1556           /* update il and jl for i-th row */
1557           il[i] = jmin;
1558           j     = bj[jmin];
1559           jl[i] = jl[j];
1560           jl[j] = i;
1561         }
1562         i = nexti;
1563       }
1564 
1565       /* shift the diagonals when zero pivot is detected */
1566       /* compute rs=sum of abs(off-diagonal) */
1567       rs   = 0.0;
1568       jmin = bi[k] + 1;
1569       nz   = bi[k + 1] - jmin;
1570       if (nz) {
1571         bcol = bj + jmin;
1572         while (nz--) {
1573           rs += PetscAbsScalar(rtmp[*bcol]);
1574           bcol++;
1575         }
1576       }
1577 
1578       sctx.rs = rs;
1579       sctx.pv = dk;
1580       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1581       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1582       dk = sctx.pv;
1583 
1584       /* copy data into U(k,:) */
1585       ba[bi[k]] = 1.0 / dk;
1586       jmin      = bi[k] + 1;
1587       nz        = bi[k + 1] - jmin;
1588       if (nz) {
1589         bcol = bj + jmin;
1590         bval = ba + jmin;
1591         while (nz--) {
1592           *bval++       = rtmp[*bcol];
1593           rtmp[*bcol++] = 0.0;
1594         }
1595         /* add k-th row into il and jl */
1596         il[k] = jmin;
1597         i     = bj[jmin];
1598         jl[k] = jl[i];
1599         jl[i] = k;
1600       }
1601     } /* end of for (k = 0; k<mbs; k++) */
1602   } while (sctx.newshift);
1603   PetscCall(PetscFree(rtmp));
1604   PetscCall(PetscFree2(il, jl));
1605 
1606   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1607   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1608   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1609   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1610   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1611 
1612   C->assembled    = PETSC_TRUE;
1613   C->preallocated = PETSC_TRUE;
1614 
1615   PetscCall(PetscLogFlops(C->rmap->N));
1616   if (sctx.nshift) {
1617     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1618       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1619     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1620       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1621     }
1622   }
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) {
1627   Mat C;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1631   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1632   PetscCall(MatCholeskyFactorNumeric(C, A, info));
1633 
1634   A->ops->solve          = C->ops->solve;
1635   A->ops->solvetranspose = C->ops->solvetranspose;
1636 
1637   PetscCall(MatHeaderMerge(A, &C));
1638   PetscFunctionReturn(0);
1639 }
1640