xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6 
7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) {
8   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9   MatScalar    *dd   = fact->a;
10   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
11 
12   PetscFunctionBegin;
13   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
14 
15   nneg_tmp = 0;
16   npos_tmp = 0;
17   for (i = 0; i < mbs; i++) {
18     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20     fi++;
21   }
22   if (nneg) *nneg = nneg_tmp;
23   if (npos) *npos = npos_tmp;
24   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25   PetscFunctionReturn(0);
26 }
27 
28 /*
29   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31 */
32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
33   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
34   const PetscInt *rip, *ai, *aj;
35   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
36   PetscInt        m, reallocs = 0, prow;
37   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
38   PetscReal       f = info->fill;
39   PetscBool       perm_identity;
40 
41   PetscFunctionBegin;
42   /* check whether perm is the identity mapping */
43   PetscCall(ISIdentity(perm, &perm_identity));
44   PetscCall(ISGetIndices(perm, &rip));
45 
46   if (perm_identity) { /* without permutation */
47     a->permute = PETSC_FALSE;
48 
49     ai = a->i;
50     aj = a->j;
51   } else { /* non-trivial permutation */
52     a->permute = PETSC_TRUE;
53 
54     PetscCall(MatReorderingSeqSBAIJ(A, perm));
55 
56     ai = a->inew;
57     aj = a->jnew;
58   }
59 
60   /* initialization */
61   PetscCall(PetscMalloc1(mbs + 1, &iu));
62   umax = (PetscInt)(f * ai[mbs] + 1);
63   umax += mbs + 1;
64   PetscCall(PetscMalloc1(umax, &ju));
65   iu[0] = mbs + 1;
66   juidx = mbs + 1; /* index for ju */
67   /* jl linked list for pivot row -- linked list for col index */
68   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
69   for (i = 0; i < mbs; i++) {
70     jl[i] = mbs;
71     q[i]  = 0;
72   }
73 
74   /* for each row k */
75   for (k = 0; k < mbs; k++) {
76     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
77     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
78     q[k] = mbs;
79     /* initialize nonzero structure of k-th row to row rip[k] of A */
80     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
81     jmax = ai[rip[k] + 1];
82     for (j = jmin; j < jmax; j++) {
83       vj = rip[aj[j]]; /* col. value */
84       if (vj > k) {
85         qm = k;
86         do {
87           m  = qm;
88           qm = q[m];
89         } while (qm < vj);
90         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
91         nzk++;
92         q[m]  = vj;
93         q[vj] = qm;
94       } /* if (vj > k) */
95     }   /* for (j=jmin; j<jmax; j++) */
96 
97     /* modify nonzero structure of k-th row by computing fill-in
98        for each row i to be merged in */
99     prow = k;
100     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
101 
102     while (prow < k) {
103       /* merge row prow into k-th row */
104       jmin = iu[prow] + 1;
105       jmax = iu[prow + 1];
106       qm   = k;
107       for (j = jmin; j < jmax; j++) {
108         vj = ju[j];
109         do {
110           m  = qm;
111           qm = q[m];
112         } while (qm < vj);
113         if (qm != vj) {
114           nzk++;
115           q[m]  = vj;
116           q[vj] = qm;
117           qm    = vj;
118         }
119       }
120       prow = jl[prow]; /* next pivot row */
121     }
122 
123     /* add k to row list for first nonzero element in k-th row */
124     if (nzk > 0) {
125       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
126       jl[k] = jl[i];
127       jl[i] = k;
128     }
129     iu[k + 1] = iu[k] + nzk;
130 
131     /* allocate more space to ju if needed */
132     if (iu[k + 1] > umax) {
133       /* estimate how much additional space we will need */
134       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
135       /* just double the memory each time */
136       maxadd = umax;
137       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
138       umax += maxadd;
139 
140       /* allocate a longer ju */
141       PetscCall(PetscMalloc1(umax, &jutmp));
142       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
143       PetscCall(PetscFree(ju));
144       ju = jutmp;
145       reallocs++; /* count how many times we realloc */
146     }
147 
148     /* save nonzero structure of k-th row in ju */
149     i = k;
150     while (nzk--) {
151       i           = q[i];
152       ju[juidx++] = i;
153     }
154   }
155 
156 #if defined(PETSC_USE_INFO)
157   if (ai[mbs] != 0) {
158     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
159     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
160     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
161     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
162     PetscCall(PetscInfo(A, "for best performance.\n"));
163   } else {
164     PetscCall(PetscInfo(A, "Empty matrix\n"));
165   }
166 #endif
167 
168   PetscCall(ISRestoreIndices(perm, &rip));
169   PetscCall(PetscFree2(jl, q));
170 
171   /* put together the new matrix */
172   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
173 
174   /* PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)iperm)); */
175   b               = (Mat_SeqSBAIJ *)(F)->data;
176   b->singlemalloc = PETSC_FALSE;
177   b->free_a       = PETSC_TRUE;
178   b->free_ij      = PETSC_TRUE;
179 
180   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
181   b->j    = ju;
182   b->i    = iu;
183   b->diag = NULL;
184   b->ilen = NULL;
185   b->imax = NULL;
186   b->row  = perm;
187 
188   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
189 
190   PetscCall(PetscObjectReference((PetscObject)perm));
191 
192   b->icol = perm;
193   PetscCall(PetscObjectReference((PetscObject)perm));
194   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
195   /* In b structure:  Free imax, ilen, old a, old j.
196      Allocate idnew, solve_work, new a, new j */
197   PetscCall(PetscLogObjectMemory((PetscObject)F, (iu[mbs] - mbs) * (sizeof(PetscInt) + sizeof(MatScalar))));
198   b->maxnz = b->nz = iu[mbs];
199 
200   (F)->info.factor_mallocs   = reallocs;
201   (F)->info.fill_ratio_given = f;
202   if (ai[mbs] != 0) {
203     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
204   } else {
205     (F)->info.fill_ratio_needed = 0.0;
206   }
207   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
208   PetscFunctionReturn(0);
209 }
210 /*
211     Symbolic U^T*D*U factorization for SBAIJ format.
212     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
213 */
214 #include <petscbt.h>
215 #include <../src/mat/utils/freespace.h>
216 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
217   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
218   Mat_SeqSBAIJ      *b;
219   PetscBool          perm_identity, missing;
220   PetscReal          fill = info->fill;
221   const PetscInt    *rip, *ai = a->i, *aj = a->j;
222   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
223   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
224   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
225   PetscFreeSpaceList free_space = NULL, current_space = NULL;
226   PetscBT            lnkbt;
227 
228   PetscFunctionBegin;
229   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
230   PetscCall(MatMissingDiagonal(A, &missing, &i));
231   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
232   if (bs > 1) {
233     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
234     PetscFunctionReturn(0);
235   }
236 
237   /* check whether perm is the identity mapping */
238   PetscCall(ISIdentity(perm, &perm_identity));
239   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
240   a->permute = PETSC_FALSE;
241   PetscCall(ISGetIndices(perm, &rip));
242 
243   /* initialization */
244   PetscCall(PetscMalloc1(mbs + 1, &ui));
245   PetscCall(PetscMalloc1(mbs + 1, &udiag));
246   ui[0] = 0;
247 
248   /* jl: linked list for storing indices of the pivot rows
249      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
250   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
251   for (i = 0; i < mbs; i++) {
252     jl[i] = mbs;
253     il[i] = 0;
254   }
255 
256   /* create and initialize a linked list for storing column indices of the active row k */
257   nlnk = mbs + 1;
258   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
259 
260   /* initial FreeSpace size is fill*(ai[mbs]+1) */
261   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
262   current_space = free_space;
263 
264   for (k = 0; k < mbs; k++) { /* for each active row k */
265     /* initialize lnk by the column indices of row rip[k] of A */
266     nzk   = 0;
267     ncols = ai[k + 1] - ai[k];
268     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
269     for (j = 0; j < ncols; j++) {
270       i       = *(aj + ai[k] + j);
271       cols[j] = i;
272     }
273     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
274     nzk += nlnk;
275 
276     /* update lnk by computing fill-in for each pivot row to be merged in */
277     prow = jl[k]; /* 1st pivot row */
278 
279     while (prow < k) {
280       nextprow = jl[prow];
281       /* merge prow into k-th row */
282       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
283       jmax     = ui[prow + 1];
284       ncols    = jmax - jmin;
285       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
286       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
287       nzk += nlnk;
288 
289       /* update il and jl for prow */
290       if (jmin < jmax) {
291         il[prow] = jmin;
292         j        = *uj_ptr;
293         jl[prow] = jl[j];
294         jl[j]    = prow;
295       }
296       prow = nextprow;
297     }
298 
299     /* if free space is not available, make more free space */
300     if (current_space->local_remaining < nzk) {
301       i = mbs - k + 1;                                   /* num of unfactored rows */
302       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
303       PetscCall(PetscFreeSpaceGet(i, &current_space));
304       reallocs++;
305     }
306 
307     /* copy data into free space, then initialize lnk */
308     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
309 
310     /* add the k-th row into il and jl */
311     if (nzk > 1) {
312       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
313       jl[k] = jl[i];
314       jl[i] = k;
315       il[k] = ui[k] + 1;
316     }
317     ui_ptr[k] = current_space->array;
318 
319     current_space->array += nzk;
320     current_space->local_used += nzk;
321     current_space->local_remaining -= nzk;
322 
323     ui[k + 1] = ui[k] + nzk;
324   }
325 
326   PetscCall(ISRestoreIndices(perm, &rip));
327   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
328 
329   /* destroy list of free space and other temporary array(s) */
330   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
331   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
332   PetscCall(PetscLLDestroy(lnk, lnkbt));
333 
334   /* put together the new matrix in MATSEQSBAIJ format */
335   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
336 
337   b               = (Mat_SeqSBAIJ *)fact->data;
338   b->singlemalloc = PETSC_FALSE;
339   b->free_a       = PETSC_TRUE;
340   b->free_ij      = PETSC_TRUE;
341 
342   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
343 
344   b->j         = uj;
345   b->i         = ui;
346   b->diag      = udiag;
347   b->free_diag = PETSC_TRUE;
348   b->ilen      = NULL;
349   b->imax      = NULL;
350   b->row       = perm;
351   b->icol      = perm;
352 
353   PetscCall(PetscObjectReference((PetscObject)perm));
354   PetscCall(PetscObjectReference((PetscObject)perm));
355 
356   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
357 
358   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
359   PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[mbs] * (sizeof(PetscInt) + sizeof(MatScalar))));
360 
361   b->maxnz = b->nz = ui[mbs];
362 
363   fact->info.factor_mallocs   = reallocs;
364   fact->info.fill_ratio_given = fill;
365   if (ai[mbs] != 0) {
366     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
367   } else {
368     fact->info.fill_ratio_needed = 0.0;
369   }
370 #if defined(PETSC_USE_INFO)
371   if (ai[mbs] != 0) {
372     PetscReal af = fact->info.fill_ratio_needed;
373     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
374     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
375     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
376   } else {
377     PetscCall(PetscInfo(A, "Empty matrix\n"));
378   }
379 #endif
380   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
385   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
386   Mat_SeqSBAIJ      *b;
387   PetscBool          perm_identity, missing;
388   PetscReal          fill = info->fill;
389   const PetscInt    *rip, *ai, *aj;
390   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
391   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
392   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
393   PetscFreeSpaceList free_space = NULL, current_space = NULL;
394   PetscBT            lnkbt;
395 
396   PetscFunctionBegin;
397   PetscCall(MatMissingDiagonal(A, &missing, &d));
398   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
399 
400   /*
401    This code originally uses Modified Sparse Row (MSR) storage
402    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
403    Then it is rewritten so the factor B takes seqsbaij format. However the associated
404    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
405    thus the original code in MSR format is still used for these cases.
406    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
407    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
408   */
409   if (bs > 1) {
410     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
411     PetscFunctionReturn(0);
412   }
413 
414   /* check whether perm is the identity mapping */
415   PetscCall(ISIdentity(perm, &perm_identity));
416   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
417   a->permute = PETSC_FALSE;
418   ai         = a->i;
419   aj         = a->j;
420   PetscCall(ISGetIndices(perm, &rip));
421 
422   /* initialization */
423   PetscCall(PetscMalloc1(mbs + 1, &ui));
424   ui[0] = 0;
425 
426   /* jl: linked list for storing indices of the pivot rows
427      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
428   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
429   for (i = 0; i < mbs; i++) {
430     jl[i] = mbs;
431     il[i] = 0;
432   }
433 
434   /* create and initialize a linked list for storing column indices of the active row k */
435   nlnk = mbs + 1;
436   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
437 
438   /* initial FreeSpace size is fill*(ai[mbs]+1) */
439   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
440   current_space = free_space;
441 
442   for (k = 0; k < mbs; k++) { /* for each active row k */
443     /* initialize lnk by the column indices of row rip[k] of A */
444     nzk   = 0;
445     ncols = ai[rip[k] + 1] - ai[rip[k]];
446     for (j = 0; j < ncols; j++) {
447       i       = *(aj + ai[rip[k]] + j);
448       cols[j] = rip[i];
449     }
450     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
451     nzk += nlnk;
452 
453     /* update lnk by computing fill-in for each pivot row to be merged in */
454     prow = jl[k]; /* 1st pivot row */
455 
456     while (prow < k) {
457       nextprow = jl[prow];
458       /* merge prow into k-th row */
459       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
460       jmax     = ui[prow + 1];
461       ncols    = jmax - jmin;
462       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
463       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
464       nzk += nlnk;
465 
466       /* update il and jl for prow */
467       if (jmin < jmax) {
468         il[prow] = jmin;
469 
470         j        = *uj_ptr;
471         jl[prow] = jl[j];
472         jl[j]    = prow;
473       }
474       prow = nextprow;
475     }
476 
477     /* if free space is not available, make more free space */
478     if (current_space->local_remaining < nzk) {
479       i = mbs - k + 1;                                                            /* num of unfactored rows */
480       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
481       PetscCall(PetscFreeSpaceGet(i, &current_space));
482       reallocs++;
483     }
484 
485     /* copy data into free space, then initialize lnk */
486     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
487 
488     /* add the k-th row into il and jl */
489     if (nzk - 1 > 0) {
490       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
491       jl[k] = jl[i];
492       jl[i] = k;
493       il[k] = ui[k] + 1;
494     }
495     ui_ptr[k] = current_space->array;
496 
497     current_space->array += nzk;
498     current_space->local_used += nzk;
499     current_space->local_remaining -= nzk;
500 
501     ui[k + 1] = ui[k] + nzk;
502   }
503 
504   PetscCall(ISRestoreIndices(perm, &rip));
505   PetscCall(PetscFree4(ui_ptr, il, jl, cols));
506 
507   /* destroy list of free space and other temporary array(s) */
508   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
509   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
510   PetscCall(PetscLLDestroy(lnk, lnkbt));
511 
512   /* put together the new matrix in MATSEQSBAIJ format */
513   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
514 
515   b               = (Mat_SeqSBAIJ *)fact->data;
516   b->singlemalloc = PETSC_FALSE;
517   b->free_a       = PETSC_TRUE;
518   b->free_ij      = PETSC_TRUE;
519 
520   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
521 
522   b->j    = uj;
523   b->i    = ui;
524   b->diag = NULL;
525   b->ilen = NULL;
526   b->imax = NULL;
527   b->row  = perm;
528 
529   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
530 
531   PetscCall(PetscObjectReference((PetscObject)perm));
532   b->icol = perm;
533   PetscCall(PetscObjectReference((PetscObject)perm));
534   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535   PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[mbs] - mbs) * (sizeof(PetscInt) + sizeof(MatScalar))));
536   b->maxnz = b->nz = ui[mbs];
537 
538   fact->info.factor_mallocs   = reallocs;
539   fact->info.fill_ratio_given = fill;
540   if (ai[mbs] != 0) {
541     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
542   } else {
543     fact->info.fill_ratio_needed = 0.0;
544   }
545 #if defined(PETSC_USE_INFO)
546   if (ai[mbs] != 0) {
547     PetscReal af = fact->info.fill_ratio_needed;
548     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
549     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
550     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
551   } else {
552     PetscCall(PetscInfo(A, "Empty matrix\n"));
553   }
554 #endif
555   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
556   PetscFunctionReturn(0);
557 }
558 
559 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) {
560   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561   IS              perm = b->row;
562   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563   PetscInt        i, j;
564   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
566   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
567   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
568   MatScalar      *work;
569   PetscInt       *pivots;
570   PetscBool       allowzeropivot, zeropivotdetected;
571 
572   PetscFunctionBegin;
573   /* initialization */
574   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576   allowzeropivot = PetscNot(A->erroriffailure);
577 
578   il[0] = 0;
579   for (i = 0; i < mbs; i++) jl[i] = mbs;
580 
581   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582   PetscCall(PetscMalloc1(bs, &pivots));
583 
584   PetscCall(ISGetIndices(perm, &perm_ptr));
585 
586   /* check permutation */
587   if (!a->permute) {
588     ai = a->i;
589     aj = a->j;
590     aa = a->a;
591   } else {
592     ai = a->inew;
593     aj = a->jnew;
594     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
598 
599     for (i = 0; i < mbs; i++) {
600       jmin = ai[i];
601       jmax = ai[i + 1];
602       for (j = jmin; j < jmax; j++) {
603         while (a2anew[j] != j) {
604           k         = a2anew[j];
605           a2anew[j] = a2anew[k];
606           a2anew[k] = k;
607           for (k1 = 0; k1 < bs2; k1++) {
608             dk[k1]           = aa[k * bs2 + k1];
609             aa[k * bs2 + k1] = aa[j * bs2 + k1];
610             aa[j * bs2 + k1] = dk[k1];
611           }
612         }
613         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
614         if (i > aj[j]) {
615           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
616           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
618             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619           }
620         }
621       }
622     }
623     PetscCall(PetscFree(a2anew));
624   }
625 
626   /* for each row k */
627   for (k = 0; k < mbs; k++) {
628     /*initialize k-th row with elements nonzero in row perm(k) of A */
629     jmin = ai[perm_ptr[k]];
630     jmax = ai[perm_ptr[k] + 1];
631 
632     ap = aa + jmin * bs2;
633     for (j = jmin; j < jmax; j++) {
634       vj       = perm_ptr[aj[j]]; /* block col. index */
635       rtmp_ptr = rtmp + vj * bs2;
636       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637     }
638 
639     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641     i = jl[k]; /* first row to be added to k_th row  */
642 
643     while (i < k) {
644       nexti = jl[i]; /* next row to be added to k_th row */
645 
646       /* compute multiplier */
647       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
648 
649       /* uik = -inv(Di)*U_bar(i,k) */
650       diag = ba + i * bs2;
651       u    = ba + ili * bs2;
652       PetscCall(PetscArrayzero(uik, bs2));
653       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
654 
655       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657       PetscCall(PetscLogFlops(4.0 * bs * bs2));
658 
659       /* update -U(i,k) */
660       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
661 
662       /* add multiple of row i to k-th row ... */
663       jmin = ili + 1;
664       jmax = bi[i + 1];
665       if (jmin < jmax) {
666         for (j = jmin; j < jmax; j++) {
667           /* rtmp += -U(i,k)^T * U_bar(i,j) */
668           rtmp_ptr = rtmp + bj[j] * bs2;
669           u        = ba + j * bs2;
670           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671         }
672         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
673 
674         /* ... add i to row list for next nonzero entry */
675         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676         j     = bj[jmin];
677         jl[i] = jl[j];
678         jl[j] = i; /* update jl */
679       }
680       i = nexti;
681     }
682 
683     /* save nonzero entries in k-th row of U ... */
684 
685     /* invert diagonal block */
686     diag = ba + k * bs2;
687     PetscCall(PetscArraycpy(diag, dk, bs2));
688 
689     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
691 
692     jmin = bi[k];
693     jmax = bi[k + 1];
694     if (jmin < jmax) {
695       for (j = jmin; j < jmax; j++) {
696         vj       = bj[j]; /* block col. index of U */
697         u        = ba + j * bs2;
698         rtmp_ptr = rtmp + vj * bs2;
699         for (k1 = 0; k1 < bs2; k1++) {
700           *u++        = *rtmp_ptr;
701           *rtmp_ptr++ = 0.0;
702         }
703       }
704 
705       /* ... add k to row list for first nonzero entry in k-th row */
706       il[k] = jmin;
707       i     = bj[jmin];
708       jl[k] = jl[i];
709       jl[i] = k;
710     }
711   }
712 
713   PetscCall(PetscFree(rtmp));
714   PetscCall(PetscFree2(il, jl));
715   PetscCall(PetscFree3(dk, uik, work));
716   PetscCall(PetscFree(pivots));
717   if (a->permute) PetscCall(PetscFree(aa));
718 
719   PetscCall(ISRestoreIndices(perm, &perm_ptr));
720 
721   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
722   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
724   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
725 
726   C->assembled    = PETSC_TRUE;
727   C->preallocated = PETSC_TRUE;
728 
729   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) {
734   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
735   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
736   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
737   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
738   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
739   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
740   MatScalar    *work;
741   PetscInt     *pivots;
742   PetscBool     allowzeropivot, zeropivotdetected;
743 
744   PetscFunctionBegin;
745   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
746   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
747   il[0] = 0;
748   for (i = 0; i < mbs; i++) jl[i] = mbs;
749 
750   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
751   PetscCall(PetscMalloc1(bs, &pivots));
752   allowzeropivot = PetscNot(A->erroriffailure);
753 
754   ai = a->i;
755   aj = a->j;
756   aa = a->a;
757 
758   /* for each row k */
759   for (k = 0; k < mbs; k++) {
760     /*initialize k-th row with elements nonzero in row k of A */
761     jmin = ai[k];
762     jmax = ai[k + 1];
763     ap   = aa + jmin * bs2;
764     for (j = jmin; j < jmax; j++) {
765       vj       = aj[j]; /* block col. index */
766       rtmp_ptr = rtmp + vj * bs2;
767       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
768     }
769 
770     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
771     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
772     i = jl[k]; /* first row to be added to k_th row  */
773 
774     while (i < k) {
775       nexti = jl[i]; /* next row to be added to k_th row */
776 
777       /* compute multiplier */
778       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
779 
780       /* uik = -inv(Di)*U_bar(i,k) */
781       diag = ba + i * bs2;
782       u    = ba + ili * bs2;
783       PetscCall(PetscArrayzero(uik, bs2));
784       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
785 
786       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
787       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
788       PetscCall(PetscLogFlops(2.0 * bs * bs2));
789 
790       /* update -U(i,k) */
791       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
792 
793       /* add multiple of row i to k-th row ... */
794       jmin = ili + 1;
795       jmax = bi[i + 1];
796       if (jmin < jmax) {
797         for (j = jmin; j < jmax; j++) {
798           /* rtmp += -U(i,k)^T * U_bar(i,j) */
799           rtmp_ptr = rtmp + bj[j] * bs2;
800           u        = ba + j * bs2;
801           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
802         }
803         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
804 
805         /* ... add i to row list for next nonzero entry */
806         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
807         j     = bj[jmin];
808         jl[i] = jl[j];
809         jl[j] = i; /* update jl */
810       }
811       i = nexti;
812     }
813 
814     /* save nonzero entries in k-th row of U ... */
815 
816     /* invert diagonal block */
817     diag = ba + k * bs2;
818     PetscCall(PetscArraycpy(diag, dk, bs2));
819 
820     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
821     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
822 
823     jmin = bi[k];
824     jmax = bi[k + 1];
825     if (jmin < jmax) {
826       for (j = jmin; j < jmax; j++) {
827         vj       = bj[j]; /* block col. index of U */
828         u        = ba + j * bs2;
829         rtmp_ptr = rtmp + vj * bs2;
830         for (k1 = 0; k1 < bs2; k1++) {
831           *u++        = *rtmp_ptr;
832           *rtmp_ptr++ = 0.0;
833         }
834       }
835 
836       /* ... add k to row list for first nonzero entry in k-th row */
837       il[k] = jmin;
838       i     = bj[jmin];
839       jl[k] = jl[i];
840       jl[i] = k;
841     }
842   }
843 
844   PetscCall(PetscFree(rtmp));
845   PetscCall(PetscFree2(il, jl));
846   PetscCall(PetscFree3(dk, uik, work));
847   PetscCall(PetscFree(pivots));
848 
849   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
850   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853   C->assembled           = PETSC_TRUE;
854   C->preallocated        = PETSC_TRUE;
855 
856   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
857   PetscFunctionReturn(0);
858 }
859 
860 /*
861     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
862     Version for blocks 2 by 2.
863 */
864 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) {
865   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
866   IS              perm = b->row;
867   const PetscInt *ai, *aj, *perm_ptr;
868   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
869   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
870   MatScalar      *ba = b->a, *aa, *ap;
871   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
872   PetscReal       shift = info->shiftamount;
873   PetscBool       allowzeropivot, zeropivotdetected;
874 
875   PetscFunctionBegin;
876   allowzeropivot = PetscNot(A->erroriffailure);
877 
878   /* initialization */
879   /* il and jl record the first nonzero element in each row of the accessing
880      window U(0:k, k:mbs-1).
881      jl:    list of rows to be added to uneliminated rows
882             i>= k: jl(i) is the first row to be added to row i
883             i<  k: jl(i) is the row following row i in some list of rows
884             jl(i) = mbs indicates the end of a list
885      il(i): points to the first nonzero element in columns k,...,mbs-1 of
886             row i of U */
887   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
888   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
889   il[0] = 0;
890   for (i = 0; i < mbs; i++) jl[i] = mbs;
891 
892   PetscCall(ISGetIndices(perm, &perm_ptr));
893 
894   /* check permutation */
895   if (!a->permute) {
896     ai = a->i;
897     aj = a->j;
898     aa = a->a;
899   } else {
900     ai = a->inew;
901     aj = a->jnew;
902     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
903     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
904     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
905     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
906 
907     for (i = 0; i < mbs; i++) {
908       jmin = ai[i];
909       jmax = ai[i + 1];
910       for (j = jmin; j < jmax; j++) {
911         while (a2anew[j] != j) {
912           k         = a2anew[j];
913           a2anew[j] = a2anew[k];
914           a2anew[k] = k;
915           for (k1 = 0; k1 < 4; k1++) {
916             dk[k1]         = aa[k * 4 + k1];
917             aa[k * 4 + k1] = aa[j * 4 + k1];
918             aa[j * 4 + k1] = dk[k1];
919           }
920         }
921         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
922         if (i > aj[j]) {
923           ap    = aa + j * 4; /* ptr to the beginning of the block */
924           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
925           ap[1] = ap[2];
926           ap[2] = dk[1];
927         }
928       }
929     }
930     PetscCall(PetscFree(a2anew));
931   }
932 
933   /* for each row k */
934   for (k = 0; k < mbs; k++) {
935     /*initialize k-th row with elements nonzero in row perm(k) of A */
936     jmin = ai[perm_ptr[k]];
937     jmax = ai[perm_ptr[k] + 1];
938     ap   = aa + jmin * 4;
939     for (j = jmin; j < jmax; j++) {
940       vj       = perm_ptr[aj[j]]; /* block col. index */
941       rtmp_ptr = rtmp + vj * 4;
942       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
943     }
944 
945     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
946     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
947     i = jl[k]; /* first row to be added to k_th row  */
948 
949     while (i < k) {
950       nexti = jl[i]; /* next row to be added to k_th row */
951 
952       /* compute multiplier */
953       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
954 
955       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
956       diag   = ba + i * 4;
957       u      = ba + ili * 4;
958       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
959       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
960       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
961       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
962 
963       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
964       dk[0] += uik[0] * u[0] + uik[1] * u[1];
965       dk[1] += uik[2] * u[0] + uik[3] * u[1];
966       dk[2] += uik[0] * u[2] + uik[1] * u[3];
967       dk[3] += uik[2] * u[2] + uik[3] * u[3];
968 
969       PetscCall(PetscLogFlops(16.0 * 2.0));
970 
971       /* update -U(i,k): ba[ili] = uik */
972       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
973 
974       /* add multiple of row i to k-th row ... */
975       jmin = ili + 1;
976       jmax = bi[i + 1];
977       if (jmin < jmax) {
978         for (j = jmin; j < jmax; j++) {
979           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
980           rtmp_ptr = rtmp + bj[j] * 4;
981           u        = ba + j * 4;
982           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
983           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
984           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
985           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
986         }
987         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
988 
989         /* ... add i to row list for next nonzero entry */
990         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
991         j     = bj[jmin];
992         jl[i] = jl[j];
993         jl[j] = i; /* update jl */
994       }
995       i = nexti;
996     }
997 
998     /* save nonzero entries in k-th row of U ... */
999 
1000     /* invert diagonal block */
1001     diag = ba + k * 4;
1002     PetscCall(PetscArraycpy(diag, dk, 4));
1003     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1004     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1005 
1006     jmin = bi[k];
1007     jmax = bi[k + 1];
1008     if (jmin < jmax) {
1009       for (j = jmin; j < jmax; j++) {
1010         vj       = bj[j]; /* block col. index of U */
1011         u        = ba + j * 4;
1012         rtmp_ptr = rtmp + vj * 4;
1013         for (k1 = 0; k1 < 4; k1++) {
1014           *u++        = *rtmp_ptr;
1015           *rtmp_ptr++ = 0.0;
1016         }
1017       }
1018 
1019       /* ... add k to row list for first nonzero entry in k-th row */
1020       il[k] = jmin;
1021       i     = bj[jmin];
1022       jl[k] = jl[i];
1023       jl[i] = k;
1024     }
1025   }
1026 
1027   PetscCall(PetscFree(rtmp));
1028   PetscCall(PetscFree2(il, jl));
1029   if (a->permute) PetscCall(PetscFree(aa));
1030   PetscCall(ISRestoreIndices(perm, &perm_ptr));
1031 
1032   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1033   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1034   C->assembled           = PETSC_TRUE;
1035   C->preallocated        = PETSC_TRUE;
1036 
1037   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 /*
1042       Version for when blocks are 2 by 2 Using natural ordering
1043 */
1044 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) {
1045   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1046   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1047   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1048   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1049   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1050   PetscReal     shift = info->shiftamount;
1051   PetscBool     allowzeropivot, zeropivotdetected;
1052 
1053   PetscFunctionBegin;
1054   allowzeropivot = PetscNot(A->erroriffailure);
1055 
1056   /* initialization */
1057   /* il and jl record the first nonzero element in each row of the accessing
1058      window U(0:k, k:mbs-1).
1059      jl:    list of rows to be added to uneliminated rows
1060             i>= k: jl(i) is the first row to be added to row i
1061             i<  k: jl(i) is the row following row i in some list of rows
1062             jl(i) = mbs indicates the end of a list
1063      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1064             row i of U */
1065   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1066   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1067   il[0] = 0;
1068   for (i = 0; i < mbs; i++) jl[i] = mbs;
1069 
1070   ai = a->i;
1071   aj = a->j;
1072   aa = a->a;
1073 
1074   /* for each row k */
1075   for (k = 0; k < mbs; k++) {
1076     /*initialize k-th row with elements nonzero in row k of A */
1077     jmin = ai[k];
1078     jmax = ai[k + 1];
1079     ap   = aa + jmin * 4;
1080     for (j = jmin; j < jmax; j++) {
1081       vj       = aj[j]; /* block col. index */
1082       rtmp_ptr = rtmp + vj * 4;
1083       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1084     }
1085 
1086     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1087     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1088     i = jl[k]; /* first row to be added to k_th row  */
1089 
1090     while (i < k) {
1091       nexti = jl[i]; /* next row to be added to k_th row */
1092 
1093       /* compute multiplier */
1094       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1095 
1096       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1097       diag   = ba + i * 4;
1098       u      = ba + ili * 4;
1099       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1100       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1101       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1102       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1103 
1104       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1105       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1106       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1107       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1108       dk[3] += uik[2] * u[2] + uik[3] * u[3];
1109 
1110       PetscCall(PetscLogFlops(16.0 * 2.0));
1111 
1112       /* update -U(i,k): ba[ili] = uik */
1113       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1114 
1115       /* add multiple of row i to k-th row ... */
1116       jmin = ili + 1;
1117       jmax = bi[i + 1];
1118       if (jmin < jmax) {
1119         for (j = jmin; j < jmax; j++) {
1120           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1121           rtmp_ptr = rtmp + bj[j] * 4;
1122           u        = ba + j * 4;
1123           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1124           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1125           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1126           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1127         }
1128         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1129 
1130         /* ... add i to row list for next nonzero entry */
1131         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1132         j     = bj[jmin];
1133         jl[i] = jl[j];
1134         jl[j] = i; /* update jl */
1135       }
1136       i = nexti;
1137     }
1138 
1139     /* save nonzero entries in k-th row of U ... */
1140 
1141     /* invert diagonal block */
1142     diag = ba + k * 4;
1143     PetscCall(PetscArraycpy(diag, dk, 4));
1144     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1145     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1146 
1147     jmin = bi[k];
1148     jmax = bi[k + 1];
1149     if (jmin < jmax) {
1150       for (j = jmin; j < jmax; j++) {
1151         vj       = bj[j]; /* block col. index of U */
1152         u        = ba + j * 4;
1153         rtmp_ptr = rtmp + vj * 4;
1154         for (k1 = 0; k1 < 4; k1++) {
1155           *u++        = *rtmp_ptr;
1156           *rtmp_ptr++ = 0.0;
1157         }
1158       }
1159 
1160       /* ... add k to row list for first nonzero entry in k-th row */
1161       il[k] = jmin;
1162       i     = bj[jmin];
1163       jl[k] = jl[i];
1164       jl[i] = k;
1165     }
1166   }
1167 
1168   PetscCall(PetscFree(rtmp));
1169   PetscCall(PetscFree2(il, jl));
1170 
1171   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1172   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1173   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1174   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175   C->assembled           = PETSC_TRUE;
1176   C->preallocated        = PETSC_TRUE;
1177 
1178   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /*
1183     Numeric U^T*D*U factorization for SBAIJ format.
1184     Version for blocks are 1 by 1.
1185 */
1186 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) {
1187   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1188   IS              ip = b->row;
1189   const PetscInt *ai, *aj, *rip;
1190   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1191   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1192   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1193   PetscReal       rs;
1194   FactorShiftCtx  sctx;
1195 
1196   PetscFunctionBegin;
1197   /* MatPivotSetUp(): initialize shift context sctx */
1198   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1199 
1200   PetscCall(ISGetIndices(ip, &rip));
1201   if (!a->permute) {
1202     ai = a->i;
1203     aj = a->j;
1204     aa = a->a;
1205   } else {
1206     ai = a->inew;
1207     aj = a->jnew;
1208     nz = ai[mbs];
1209     PetscCall(PetscMalloc1(nz, &aa));
1210     a2anew = a->a2anew;
1211     bval   = a->a;
1212     for (j = 0; j < nz; j++) { aa[a2anew[j]] = *(bval++); }
1213   }
1214 
1215   /* initialization */
1216   /* il and jl record the first nonzero element in each row of the accessing
1217      window U(0:k, k:mbs-1).
1218      jl:    list of rows to be added to uneliminated rows
1219             i>= k: jl(i) is the first row to be added to row i
1220             i<  k: jl(i) is the row following row i in some list of rows
1221             jl(i) = mbs indicates the end of a list
1222      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1223             row i of U */
1224   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1225 
1226   do {
1227     sctx.newshift = PETSC_FALSE;
1228     il[0]         = 0;
1229     for (i = 0; i < mbs; i++) {
1230       rtmp[i] = 0.0;
1231       jl[i]   = mbs;
1232     }
1233 
1234     for (k = 0; k < mbs; k++) {
1235       /*initialize k-th row by the perm[k]-th row of A */
1236       jmin = ai[rip[k]];
1237       jmax = ai[rip[k] + 1];
1238       bval = ba + bi[k];
1239       for (j = jmin; j < jmax; j++) {
1240         col       = rip[aj[j]];
1241         rtmp[col] = aa[j];
1242         *bval++   = 0.0; /* for in-place factorization */
1243       }
1244 
1245       /* shift the diagonal of the matrix */
1246       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1247 
1248       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1249       dk = rtmp[k];
1250       i  = jl[k]; /* first row to be added to k_th row  */
1251 
1252       while (i < k) {
1253         nexti = jl[i]; /* next row to be added to k_th row */
1254 
1255         /* compute multiplier, update diag(k) and U(i,k) */
1256         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1257         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1258         dk += uikdi * ba[ili];
1259         ba[ili] = uikdi; /* -U(i,k) */
1260 
1261         /* add multiple of row i to k-th row */
1262         jmin = ili + 1;
1263         jmax = bi[i + 1];
1264         if (jmin < jmax) {
1265           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1266           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1267 
1268           /* update il and jl for row i */
1269           il[i] = jmin;
1270           j     = bj[jmin];
1271           jl[i] = jl[j];
1272           jl[j] = i;
1273         }
1274         i = nexti;
1275       }
1276 
1277       /* shift the diagonals when zero pivot is detected */
1278       /* compute rs=sum of abs(off-diagonal) */
1279       rs   = 0.0;
1280       jmin = bi[k] + 1;
1281       nz   = bi[k + 1] - jmin;
1282       if (nz) {
1283         bcol = bj + jmin;
1284         while (nz--) {
1285           rs += PetscAbsScalar(rtmp[*bcol]);
1286           bcol++;
1287         }
1288       }
1289 
1290       sctx.rs = rs;
1291       sctx.pv = dk;
1292       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1293       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1294       dk = sctx.pv;
1295 
1296       /* copy data into U(k,:) */
1297       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1298       jmin      = bi[k] + 1;
1299       jmax      = bi[k + 1];
1300       if (jmin < jmax) {
1301         for (j = jmin; j < jmax; j++) {
1302           col       = bj[j];
1303           ba[j]     = rtmp[col];
1304           rtmp[col] = 0.0;
1305         }
1306         /* add the k-th row into il and jl */
1307         il[k] = jmin;
1308         i     = bj[jmin];
1309         jl[k] = jl[i];
1310         jl[i] = k;
1311       }
1312     }
1313   } while (sctx.newshift);
1314   PetscCall(PetscFree3(rtmp, il, jl));
1315   if (a->permute) PetscCall(PetscFree(aa));
1316 
1317   PetscCall(ISRestoreIndices(ip, &rip));
1318 
1319   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1320   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1321   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1322   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1323   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1324   C->assembled           = PETSC_TRUE;
1325   C->preallocated        = PETSC_TRUE;
1326 
1327   PetscCall(PetscLogFlops(C->rmap->N));
1328   if (sctx.nshift) {
1329     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1330       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1331     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1332       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1333     }
1334   }
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 /*
1339   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1340   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1341 */
1342 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) {
1343   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1344   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1345   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1346   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1347   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1348   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1349   FactorShiftCtx sctx;
1350   PetscReal      rs;
1351   MatScalar      d, *v;
1352 
1353   PetscFunctionBegin;
1354   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1355 
1356   /* MatPivotSetUp(): initialize shift context sctx */
1357   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1358 
1359   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1360     sctx.shift_top = info->zeropivot;
1361 
1362     PetscCall(PetscArrayzero(rtmp, mbs));
1363 
1364     for (i = 0; i < mbs; i++) {
1365       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1366       d = (aa)[a->diag[i]];
1367       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1368       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1369       v     = aa + ai[i] + 1;
1370       nz    = ai[i + 1] - ai[i] - 1;
1371       for (j = 0; j < nz; j++) {
1372         rtmp[i] += PetscAbsScalar(v[j]);
1373         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1374       }
1375       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1376     }
1377     sctx.shift_top *= 1.1;
1378     sctx.nshift_max = 5;
1379     sctx.shift_lo   = 0.;
1380     sctx.shift_hi   = 1.;
1381   }
1382 
1383   /* allocate working arrays
1384      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1385      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1386   */
1387   do {
1388     sctx.newshift = PETSC_FALSE;
1389 
1390     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1391     if (mbs) il[0] = 0;
1392 
1393     for (k = 0; k < mbs; k++) {
1394       /* zero rtmp */
1395       nz    = bi[k + 1] - bi[k];
1396       bjtmp = bj + bi[k];
1397       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1398 
1399       /* load in initial unfactored row */
1400       bval = ba + bi[k];
1401       jmin = ai[k];
1402       jmax = ai[k + 1];
1403       for (j = jmin; j < jmax; j++) {
1404         col       = aj[j];
1405         rtmp[col] = aa[j];
1406         *bval++   = 0.0; /* for in-place factorization */
1407       }
1408       /* shift the diagonal of the matrix: ZeropivotApply() */
1409       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1410 
1411       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1412       dk = rtmp[k];
1413       i  = c2r[k]; /* first row to be added to k_th row  */
1414 
1415       while (i < k) {
1416         nexti = c2r[i]; /* next row to be added to k_th row */
1417 
1418         /* compute multiplier, update diag(k) and U(i,k) */
1419         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1420         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1421         dk += uikdi * ba[ili];           /* update diag[k] */
1422         ba[ili] = uikdi;                 /* -U(i,k) */
1423 
1424         /* add multiple of row i to k-th row */
1425         jmin = ili + 1;
1426         jmax = bi[i + 1];
1427         if (jmin < jmax) {
1428           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1429           /* update il and c2r for row i */
1430           il[i]  = jmin;
1431           j      = bj[jmin];
1432           c2r[i] = c2r[j];
1433           c2r[j] = i;
1434         }
1435         i = nexti;
1436       }
1437 
1438       /* copy data into U(k,:) */
1439       rs   = 0.0;
1440       jmin = bi[k];
1441       jmax = bi[k + 1] - 1;
1442       if (jmin < jmax) {
1443         for (j = jmin; j < jmax; j++) {
1444           col   = bj[j];
1445           ba[j] = rtmp[col];
1446           rs += PetscAbsScalar(ba[j]);
1447         }
1448         /* add the k-th row into il and c2r */
1449         il[k]  = jmin;
1450         i      = bj[jmin];
1451         c2r[k] = c2r[i];
1452         c2r[i] = k;
1453       }
1454 
1455       sctx.rs = rs;
1456       sctx.pv = dk;
1457       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1458       if (sctx.newshift) break;
1459       dk = sctx.pv;
1460 
1461       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1462     }
1463   } while (sctx.newshift);
1464 
1465   PetscCall(PetscFree3(rtmp, il, c2r));
1466 
1467   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1468   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1469   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1470   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1471   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1472   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1473 
1474   B->assembled    = PETSC_TRUE;
1475   B->preallocated = PETSC_TRUE;
1476 
1477   PetscCall(PetscLogFlops(B->rmap->n));
1478 
1479   /* MatPivotView() */
1480   if (sctx.nshift) {
1481     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1482       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1483     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1484       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1485     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1486       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1487     }
1488   }
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) {
1493   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1494   PetscInt       i, j, mbs = a->mbs;
1495   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1496   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1497   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1498   PetscReal      rs;
1499   FactorShiftCtx sctx;
1500 
1501   PetscFunctionBegin;
1502   /* MatPivotSetUp(): initialize shift context sctx */
1503   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1504 
1505   /* initialization */
1506   /* il and jl record the first nonzero element in each row of the accessing
1507      window U(0:k, k:mbs-1).
1508      jl:    list of rows to be added to uneliminated rows
1509             i>= k: jl(i) is the first row to be added to row i
1510             i<  k: jl(i) is the row following row i in some list of rows
1511             jl(i) = mbs indicates the end of a list
1512      il(i): points to the first nonzero element in U(i,k:mbs-1)
1513   */
1514   PetscCall(PetscMalloc1(mbs, &rtmp));
1515   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1516 
1517   do {
1518     sctx.newshift = PETSC_FALSE;
1519     il[0]         = 0;
1520     for (i = 0; i < mbs; i++) {
1521       rtmp[i] = 0.0;
1522       jl[i]   = mbs;
1523     }
1524 
1525     for (k = 0; k < mbs; k++) {
1526       /*initialize k-th row with elements nonzero in row perm(k) of A */
1527       nz   = ai[k + 1] - ai[k];
1528       acol = aj + ai[k];
1529       aval = aa + ai[k];
1530       bval = ba + bi[k];
1531       while (nz--) {
1532         rtmp[*acol++] = *aval++;
1533         *bval++       = 0.0; /* for in-place factorization */
1534       }
1535 
1536       /* shift the diagonal of the matrix */
1537       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1538 
1539       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1540       dk = rtmp[k];
1541       i  = jl[k]; /* first row to be added to k_th row  */
1542 
1543       while (i < k) {
1544         nexti = jl[i]; /* next row to be added to k_th row */
1545         /* compute multiplier, update D(k) and U(i,k) */
1546         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1547         uikdi = -ba[ili] * ba[bi[i]];
1548         dk += uikdi * ba[ili];
1549         ba[ili] = uikdi; /* -U(i,k) */
1550 
1551         /* add multiple of row i to k-th row ... */
1552         jmin = ili + 1;
1553         nz   = bi[i + 1] - jmin;
1554         if (nz > 0) {
1555           bcol = bj + jmin;
1556           bval = ba + jmin;
1557           PetscCall(PetscLogFlops(2.0 * nz));
1558           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1559 
1560           /* update il and jl for i-th row */
1561           il[i] = jmin;
1562           j     = bj[jmin];
1563           jl[i] = jl[j];
1564           jl[j] = i;
1565         }
1566         i = nexti;
1567       }
1568 
1569       /* shift the diagonals when zero pivot is detected */
1570       /* compute rs=sum of abs(off-diagonal) */
1571       rs   = 0.0;
1572       jmin = bi[k] + 1;
1573       nz   = bi[k + 1] - jmin;
1574       if (nz) {
1575         bcol = bj + jmin;
1576         while (nz--) {
1577           rs += PetscAbsScalar(rtmp[*bcol]);
1578           bcol++;
1579         }
1580       }
1581 
1582       sctx.rs = rs;
1583       sctx.pv = dk;
1584       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1585       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1586       dk = sctx.pv;
1587 
1588       /* copy data into U(k,:) */
1589       ba[bi[k]] = 1.0 / dk;
1590       jmin      = bi[k] + 1;
1591       nz        = bi[k + 1] - jmin;
1592       if (nz) {
1593         bcol = bj + jmin;
1594         bval = ba + jmin;
1595         while (nz--) {
1596           *bval++       = rtmp[*bcol];
1597           rtmp[*bcol++] = 0.0;
1598         }
1599         /* add k-th row into il and jl */
1600         il[k] = jmin;
1601         i     = bj[jmin];
1602         jl[k] = jl[i];
1603         jl[i] = k;
1604       }
1605     } /* end of for (k = 0; k<mbs; k++) */
1606   } while (sctx.newshift);
1607   PetscCall(PetscFree(rtmp));
1608   PetscCall(PetscFree2(il, jl));
1609 
1610   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1611   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1612   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1613   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1614   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1615 
1616   C->assembled    = PETSC_TRUE;
1617   C->preallocated = PETSC_TRUE;
1618 
1619   PetscCall(PetscLogFlops(C->rmap->N));
1620   if (sctx.nshift) {
1621     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1622       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1623     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1624       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1625     }
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) {
1631   Mat C;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1635   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1636   PetscCall(MatCholeskyFactorNumeric(C, A, info));
1637 
1638   A->ops->solve          = C->ops->solve;
1639   A->ops->solvetranspose = C->ops->solvetranspose;
1640 
1641   PetscCall(MatHeaderMerge(A, &C));
1642   PetscFunctionReturn(0);
1643 }
1644