xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6 
7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
8 {
9   Mat_SeqSBAIJ *fact=(Mat_SeqSBAIJ*)F->data;
10   MatScalar    *dd=fact->a;
11   PetscInt     mbs=fact->mbs,bs=F->rmap->bs,i,nneg_tmp,npos_tmp,*fi=fact->diag;
12 
13   PetscFunctionBegin;
14   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %" PetscInt_FMT " >1 yet",bs);
15 
16   nneg_tmp = 0; npos_tmp = 0;
17   for (i=0; i<mbs; i++) {
18     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20     fi++;
21   }
22   if (nneg)  *nneg  = nneg_tmp;
23   if (npos)  *npos  = npos_tmp;
24   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25   PetscFunctionReturn(0);
26 }
27 
28 /*
29   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31 */
32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
33 {
34   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
35   const PetscInt *rip,*ai,*aj;
36   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
37   PetscInt       m,reallocs = 0,prow;
38   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
39   PetscReal      f = info->fill;
40   PetscBool      perm_identity;
41 
42   PetscFunctionBegin;
43   /* check whether perm is the identity mapping */
44   PetscCall(ISIdentity(perm,&perm_identity));
45   PetscCall(ISGetIndices(perm,&rip));
46 
47   if (perm_identity) { /* without permutation */
48     a->permute = PETSC_FALSE;
49 
50     ai = a->i; aj = a->j;
51   } else {            /* non-trivial permutation */
52     a->permute = PETSC_TRUE;
53 
54     PetscCall(MatReorderingSeqSBAIJ(A,perm));
55 
56     ai = a->inew; aj = a->jnew;
57   }
58 
59   /* initialization */
60   PetscCall(PetscMalloc1(mbs+1,&iu));
61   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
62   PetscCall(PetscMalloc1(umax,&ju));
63   iu[0] = mbs+1;
64   juidx = mbs + 1; /* index for ju */
65   /* jl linked list for pivot row -- linked list for col index */
66   PetscCall(PetscMalloc2(mbs,&jl,mbs,&q));
67   for (i=0; i<mbs; i++) {
68     jl[i] = mbs;
69     q[i]  = 0;
70   }
71 
72   /* for each row k */
73   for (k=0; k<mbs; k++) {
74     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
75     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
76     q[k] = mbs;
77     /* initialize nonzero structure of k-th row to row rip[k] of A */
78     jmin = ai[rip[k]] +1; /* exclude diag[k] */
79     jmax = ai[rip[k]+1];
80     for (j=jmin; j<jmax; j++) {
81       vj = rip[aj[j]]; /* col. value */
82       if (vj > k) {
83         qm = k;
84         do {
85           m = qm; qm = q[m];
86         } while (qm < vj);
87         PetscCheck(qm != vj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A");
88         nzk++;
89         q[m]  = vj;
90         q[vj] = qm;
91       } /* if (vj > k) */
92     } /* for (j=jmin; j<jmax; j++) */
93 
94     /* modify nonzero structure of k-th row by computing fill-in
95        for each row i to be merged in */
96     prow = k;
97     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
98 
99     while (prow < k) {
100       /* merge row prow into k-th row */
101       jmin = iu[prow] + 1; jmax = iu[prow+1];
102       qm   = k;
103       for (j=jmin; j<jmax; j++) {
104         vj = ju[j];
105         do {
106           m = qm; qm = q[m];
107         } while (qm < vj);
108         if (qm != vj) {
109           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
110         }
111       }
112       prow = jl[prow]; /* next pivot row */
113     }
114 
115     /* add k to row list for first nonzero element in k-th row */
116     if (nzk > 0) {
117       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
118       jl[k] = jl[i]; jl[i] = k;
119     }
120     iu[k+1] = iu[k] + nzk;
121 
122     /* allocate more space to ju if needed */
123     if (iu[k+1] > umax) {
124       /* estimate how much additional space we will need */
125       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
126       /* just double the memory each time */
127       maxadd = umax;
128       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
129       umax += maxadd;
130 
131       /* allocate a longer ju */
132       PetscCall(PetscMalloc1(umax,&jutmp));
133       PetscCall(PetscArraycpy(jutmp,ju,iu[k]));
134       PetscCall(PetscFree(ju));
135       ju   = jutmp;
136       reallocs++; /* count how many times we realloc */
137     }
138 
139     /* save nonzero structure of k-th row in ju */
140     i=k;
141     while (nzk--) {
142       i           = q[i];
143       ju[juidx++] = i;
144     }
145   }
146 
147 #if defined(PETSC_USE_INFO)
148   if (ai[mbs] != 0) {
149     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
150     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
151     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
152     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
153     PetscCall(PetscInfo(A,"for best performance.\n"));
154   } else {
155     PetscCall(PetscInfo(A,"Empty matrix\n"));
156   }
157 #endif
158 
159   PetscCall(ISRestoreIndices(perm,&rip));
160   PetscCall(PetscFree2(jl,q));
161 
162   /* put together the new matrix */
163   PetscCall(MatSeqSBAIJSetPreallocation(F,bs,MAT_SKIP_ALLOCATION,NULL));
164 
165   /* PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)iperm)); */
166   b                = (Mat_SeqSBAIJ*)(F)->data;
167   b->singlemalloc  = PETSC_FALSE;
168   b->free_a        = PETSC_TRUE;
169   b->free_ij       = PETSC_TRUE;
170 
171   PetscCall(PetscMalloc1((iu[mbs]+1)*bs2,&b->a));
172   b->j    = ju;
173   b->i    = iu;
174   b->diag = NULL;
175   b->ilen = NULL;
176   b->imax = NULL;
177   b->row  = perm;
178 
179   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
180 
181   PetscCall(PetscObjectReference((PetscObject)perm));
182 
183   b->icol = perm;
184   PetscCall(PetscObjectReference((PetscObject)perm));
185   PetscCall(PetscMalloc1(bs*mbs+bs,&b->solve_work));
186   /* In b structure:  Free imax, ilen, old a, old j.
187      Allocate idnew, solve_work, new a, new j */
188   PetscCall(PetscLogObjectMemory((PetscObject)F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))));
189   b->maxnz = b->nz = iu[mbs];
190 
191   (F)->info.factor_mallocs   = reallocs;
192   (F)->info.fill_ratio_given = f;
193   if (ai[mbs] != 0) {
194     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
195   } else {
196     (F)->info.fill_ratio_needed = 0.0;
197   }
198   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity));
199   PetscFunctionReturn(0);
200 }
201 /*
202     Symbolic U^T*D*U factorization for SBAIJ format.
203     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
204 */
205 #include <petscbt.h>
206 #include <../src/mat/utils/freespace.h>
207 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
208 {
209   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
210   Mat_SeqSBAIJ       *b;
211   PetscBool          perm_identity,missing;
212   PetscReal          fill = info->fill;
213   const PetscInt     *rip,*ai=a->i,*aj=a->j;
214   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow;
215   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
216   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
217   PetscFreeSpaceList free_space=NULL,current_space=NULL;
218   PetscBT            lnkbt;
219 
220   PetscFunctionBegin;
221   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
222   PetscCall(MatMissingDiagonal(A,&missing,&i));
223   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
224   if (bs > 1) {
225     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info));
226     PetscFunctionReturn(0);
227   }
228 
229   /* check whether perm is the identity mapping */
230   PetscCall(ISIdentity(perm,&perm_identity));
231   PetscCheck(perm_identity,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
232   a->permute = PETSC_FALSE;
233   PetscCall(ISGetIndices(perm,&rip));
234 
235   /* initialization */
236   PetscCall(PetscMalloc1(mbs+1,&ui));
237   PetscCall(PetscMalloc1(mbs+1,&udiag));
238   ui[0] = 0;
239 
240   /* jl: linked list for storing indices of the pivot rows
241      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
242   PetscCall(PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols));
243   for (i=0; i<mbs; i++) {
244     jl[i] = mbs; il[i] = 0;
245   }
246 
247   /* create and initialize a linked list for storing column indices of the active row k */
248   nlnk = mbs + 1;
249   PetscCall(PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt));
250 
251   /* initial FreeSpace size is fill*(ai[mbs]+1) */
252   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space));
253   current_space = free_space;
254 
255   for (k=0; k<mbs; k++) {  /* for each active row k */
256     /* initialize lnk by the column indices of row rip[k] of A */
257     nzk   = 0;
258     ncols = ai[k+1] - ai[k];
259     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %" PetscInt_FMT " in matrix ",k);
260     for (j=0; j<ncols; j++) {
261       i       = *(aj + ai[k] + j);
262       cols[j] = i;
263     }
264     PetscCall(PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt));
265     nzk += nlnk;
266 
267     /* update lnk by computing fill-in for each pivot row to be merged in */
268     prow = jl[k]; /* 1st pivot row */
269 
270     while (prow < k) {
271       nextprow = jl[prow];
272       /* merge prow into k-th row */
273       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
274       jmax   = ui[prow+1];
275       ncols  = jmax-jmin;
276       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
277       PetscCall(PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt));
278       nzk   += nlnk;
279 
280       /* update il and jl for prow */
281       if (jmin < jmax) {
282         il[prow] = jmin;
283         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
284       }
285       prow = nextprow;
286     }
287 
288     /* if free space is not available, make more free space */
289     if (current_space->local_remaining<nzk) {
290       i    = mbs - k + 1; /* num of unfactored rows */
291       i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
292       PetscCall(PetscFreeSpaceGet(i,&current_space));
293       reallocs++;
294     }
295 
296     /* copy data into free space, then initialize lnk */
297     PetscCall(PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt));
298 
299     /* add the k-th row into il and jl */
300     if (nzk > 1) {
301       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
302       jl[k] = jl[i]; jl[i] = k;
303       il[k] = ui[k] + 1;
304     }
305     ui_ptr[k] = current_space->array;
306 
307     current_space->array           += nzk;
308     current_space->local_used      += nzk;
309     current_space->local_remaining -= nzk;
310 
311     ui[k+1] = ui[k] + nzk;
312   }
313 
314   PetscCall(ISRestoreIndices(perm,&rip));
315   PetscCall(PetscFree4(ui_ptr,il,jl,cols));
316 
317   /* destroy list of free space and other temporary array(s) */
318   PetscCall(PetscMalloc1(ui[mbs]+1,&uj));
319   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag)); /* store matrix factor */
320   PetscCall(PetscLLDestroy(lnk,lnkbt));
321 
322   /* put together the new matrix in MATSEQSBAIJ format */
323   PetscCall(MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL));
324 
325   b               = (Mat_SeqSBAIJ*)fact->data;
326   b->singlemalloc = PETSC_FALSE;
327   b->free_a       = PETSC_TRUE;
328   b->free_ij      = PETSC_TRUE;
329 
330   PetscCall(PetscMalloc1(ui[mbs]+1,&b->a));
331 
332   b->j         = uj;
333   b->i         = ui;
334   b->diag      = udiag;
335   b->free_diag = PETSC_TRUE;
336   b->ilen      = NULL;
337   b->imax      = NULL;
338   b->row       = perm;
339   b->icol      = perm;
340 
341   PetscCall(PetscObjectReference((PetscObject)perm));
342   PetscCall(PetscObjectReference((PetscObject)perm));
343 
344   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
345 
346   PetscCall(PetscMalloc1(mbs+1,&b->solve_work));
347   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar))));
348 
349   b->maxnz = b->nz = ui[mbs];
350 
351   fact->info.factor_mallocs   = reallocs;
352   fact->info.fill_ratio_given = fill;
353   if (ai[mbs] != 0) {
354     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
355   } else {
356     fact->info.fill_ratio_needed = 0.0;
357   }
358 #if defined(PETSC_USE_INFO)
359   if (ai[mbs] != 0) {
360     PetscReal af = fact->info.fill_ratio_needed;
361     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
362     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
363     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
364   } else {
365     PetscCall(PetscInfo(A,"Empty matrix\n"));
366   }
367 #endif
368   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
369   PetscFunctionReturn(0);
370 }
371 
372 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
373 {
374   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
375   Mat_SeqSBAIJ       *b;
376   PetscBool          perm_identity,missing;
377   PetscReal          fill = info->fill;
378   const PetscInt     *rip,*ai,*aj;
379   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
380   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
381   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
382   PetscFreeSpaceList free_space=NULL,current_space=NULL;
383   PetscBT            lnkbt;
384 
385   PetscFunctionBegin;
386   PetscCall(MatMissingDiagonal(A,&missing,&d));
387   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
388 
389   /*
390    This code originally uses Modified Sparse Row (MSR) storage
391    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
392    Then it is rewritten so the factor B takes seqsbaij format. However the associated
393    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
394    thus the original code in MSR format is still used for these cases.
395    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
396    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
397   */
398   if (bs > 1) {
399     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info));
400     PetscFunctionReturn(0);
401   }
402 
403   /* check whether perm is the identity mapping */
404   PetscCall(ISIdentity(perm,&perm_identity));
405   PetscCheck(perm_identity,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
406   a->permute = PETSC_FALSE;
407   ai = a->i; aj = a->j;
408   PetscCall(ISGetIndices(perm,&rip));
409 
410   /* initialization */
411   PetscCall(PetscMalloc1(mbs+1,&ui));
412   ui[0] = 0;
413 
414   /* jl: linked list for storing indices of the pivot rows
415      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
416   PetscCall(PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols));
417   for (i=0; i<mbs; i++) {
418     jl[i] = mbs; il[i] = 0;
419   }
420 
421   /* create and initialize a linked list for storing column indices of the active row k */
422   nlnk = mbs + 1;
423   PetscCall(PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt));
424 
425   /* initial FreeSpace size is fill*(ai[mbs]+1) */
426   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space));
427   current_space = free_space;
428 
429   for (k=0; k<mbs; k++) {  /* for each active row k */
430     /* initialize lnk by the column indices of row rip[k] of A */
431     nzk   = 0;
432     ncols = ai[rip[k]+1] - ai[rip[k]];
433     for (j=0; j<ncols; j++) {
434       i       = *(aj + ai[rip[k]] + j);
435       cols[j] = rip[i];
436     }
437     PetscCall(PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt));
438     nzk += nlnk;
439 
440     /* update lnk by computing fill-in for each pivot row to be merged in */
441     prow = jl[k]; /* 1st pivot row */
442 
443     while (prow < k) {
444       nextprow = jl[prow];
445       /* merge prow into k-th row */
446       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
447       jmax   = ui[prow+1];
448       ncols  = jmax-jmin;
449       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
450       PetscCall(PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt));
451       nzk   += nlnk;
452 
453       /* update il and jl for prow */
454       if (jmin < jmax) {
455         il[prow] = jmin;
456 
457         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
458       }
459       prow = nextprow;
460     }
461 
462     /* if free space is not available, make more free space */
463     if (current_space->local_remaining<nzk) {
464       i = mbs - k + 1;          /* num of unfactored rows */
465       i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
466       PetscCall(PetscFreeSpaceGet(i,&current_space));
467       reallocs++;
468     }
469 
470     /* copy data into free space, then initialize lnk */
471     PetscCall(PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt));
472 
473     /* add the k-th row into il and jl */
474     if (nzk-1 > 0) {
475       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
476       jl[k] = jl[i]; jl[i] = k;
477       il[k] = ui[k] + 1;
478     }
479     ui_ptr[k] = current_space->array;
480 
481     current_space->array           += nzk;
482     current_space->local_used      += nzk;
483     current_space->local_remaining -= nzk;
484 
485     ui[k+1] = ui[k] + nzk;
486   }
487 
488   PetscCall(ISRestoreIndices(perm,&rip));
489   PetscCall(PetscFree4(ui_ptr,il,jl,cols));
490 
491   /* destroy list of free space and other temporary array(s) */
492   PetscCall(PetscMalloc1(ui[mbs]+1,&uj));
493   PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
494   PetscCall(PetscLLDestroy(lnk,lnkbt));
495 
496   /* put together the new matrix in MATSEQSBAIJ format */
497   PetscCall(MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL));
498 
499   b               = (Mat_SeqSBAIJ*)fact->data;
500   b->singlemalloc = PETSC_FALSE;
501   b->free_a       = PETSC_TRUE;
502   b->free_ij      = PETSC_TRUE;
503 
504   PetscCall(PetscMalloc1(ui[mbs]+1,&b->a));
505 
506   b->j    = uj;
507   b->i    = ui;
508   b->diag = NULL;
509   b->ilen = NULL;
510   b->imax = NULL;
511   b->row  = perm;
512 
513   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
514 
515   PetscCall(PetscObjectReference((PetscObject)perm));
516   b->icol  = perm;
517   PetscCall(PetscObjectReference((PetscObject)perm));
518   PetscCall(PetscMalloc1(mbs+1,&b->solve_work));
519   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))));
520   b->maxnz = b->nz = ui[mbs];
521 
522   fact->info.factor_mallocs   = reallocs;
523   fact->info.fill_ratio_given = fill;
524   if (ai[mbs] != 0) {
525     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
526   } else {
527     fact->info.fill_ratio_needed = 0.0;
528   }
529 #if defined(PETSC_USE_INFO)
530   if (ai[mbs] != 0) {
531     PetscReal af = fact->info.fill_ratio_needed;
532     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
533     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
534     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
535   } else {
536     PetscCall(PetscInfo(A,"Empty matrix\n"));
537   }
538 #endif
539   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity));
540   PetscFunctionReturn(0);
541 }
542 
543 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
544 {
545   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
546   IS             perm = b->row;
547   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
548   PetscInt       i,j;
549   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
550   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
551   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
552   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
553   MatScalar      *work;
554   PetscInt       *pivots;
555   PetscBool      allowzeropivot,zeropivotdetected;
556 
557   PetscFunctionBegin;
558   /* initialization */
559   PetscCall(PetscCalloc1(bs2*mbs,&rtmp));
560   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
561   allowzeropivot = PetscNot(A->erroriffailure);
562 
563   il[0] = 0;
564   for (i=0; i<mbs; i++) jl[i] = mbs;
565 
566   PetscCall(PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work));
567   PetscCall(PetscMalloc1(bs,&pivots));
568 
569   PetscCall(ISGetIndices(perm,&perm_ptr));
570 
571   /* check permutation */
572   if (!a->permute) {
573     ai = a->i; aj = a->j; aa = a->a;
574   } else {
575     ai   = a->inew; aj = a->jnew;
576     PetscCall(PetscMalloc1(bs2*ai[mbs],&aa));
577     PetscCall(PetscArraycpy(aa,a->a,bs2*ai[mbs]));
578     PetscCall(PetscMalloc1(ai[mbs],&a2anew));
579     PetscCall(PetscArraycpy(a2anew,a->a2anew,ai[mbs]));
580 
581     for (i=0; i<mbs; i++) {
582       jmin = ai[i]; jmax = ai[i+1];
583       for (j=jmin; j<jmax; j++) {
584         while (a2anew[j] != j) {
585           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
586           for (k1=0; k1<bs2; k1++) {
587             dk[k1]       = aa[k*bs2+k1];
588             aa[k*bs2+k1] = aa[j*bs2+k1];
589             aa[j*bs2+k1] = dk[k1];
590           }
591         }
592         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
593         if (i > aj[j]) {
594           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
595           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
596           for (k=0; k<bs; k++) {               /* j-th block of aa <- dk^T */
597             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
598           }
599         }
600       }
601     }
602     PetscCall(PetscFree(a2anew));
603   }
604 
605   /* for each row k */
606   for (k = 0; k<mbs; k++) {
607 
608     /*initialize k-th row with elements nonzero in row perm(k) of A */
609     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
610 
611     ap = aa + jmin*bs2;
612     for (j = jmin; j < jmax; j++) {
613       vj       = perm_ptr[aj[j]];   /* block col. index */
614       rtmp_ptr = rtmp + vj*bs2;
615       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
616     }
617 
618     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
619     PetscCall(PetscArraycpy(dk,rtmp+k*bs2,bs2));
620     i    = jl[k]; /* first row to be added to k_th row  */
621 
622     while (i < k) {
623       nexti = jl[i]; /* next row to be added to k_th row */
624 
625       /* compute multiplier */
626       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
627 
628       /* uik = -inv(Di)*U_bar(i,k) */
629       diag = ba + i*bs2;
630       u    = ba + ili*bs2;
631       PetscCall(PetscArrayzero(uik,bs2));
632       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
633 
634       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
635       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
636       PetscCall(PetscLogFlops(4.0*bs*bs2));
637 
638       /* update -U(i,k) */
639       PetscCall(PetscArraycpy(ba+ili*bs2,uik,bs2));
640 
641       /* add multiple of row i to k-th row ... */
642       jmin = ili + 1; jmax = bi[i+1];
643       if (jmin < jmax) {
644         for (j=jmin; j<jmax; j++) {
645           /* rtmp += -U(i,k)^T * U_bar(i,j) */
646           rtmp_ptr = rtmp + bj[j]*bs2;
647           u        = ba + j*bs2;
648           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
649         }
650         PetscCall(PetscLogFlops(2.0*bs*bs2*(jmax-jmin)));
651 
652         /* ... add i to row list for next nonzero entry */
653         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
654         j     = bj[jmin];
655         jl[i] = jl[j]; jl[j] = i; /* update jl */
656       }
657       i = nexti;
658     }
659 
660     /* save nonzero entries in k-th row of U ... */
661 
662     /* invert diagonal block */
663     diag = ba+k*bs2;
664     PetscCall(PetscArraycpy(diag,dk,bs2));
665 
666     PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected));
667     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
668 
669     jmin = bi[k]; jmax = bi[k+1];
670     if (jmin < jmax) {
671       for (j=jmin; j<jmax; j++) {
672         vj       = bj[j];      /* block col. index of U */
673         u        = ba + j*bs2;
674         rtmp_ptr = rtmp + vj*bs2;
675         for (k1=0; k1<bs2; k1++) {
676           *u++        = *rtmp_ptr;
677           *rtmp_ptr++ = 0.0;
678         }
679       }
680 
681       /* ... add k to row list for first nonzero entry in k-th row */
682       il[k] = jmin;
683       i     = bj[jmin];
684       jl[k] = jl[i]; jl[i] = k;
685     }
686   }
687 
688   PetscCall(PetscFree(rtmp));
689   PetscCall(PetscFree2(il,jl));
690   PetscCall(PetscFree3(dk,uik,work));
691   PetscCall(PetscFree(pivots));
692   if (a->permute) PetscCall(PetscFree(aa));
693 
694   PetscCall(ISRestoreIndices(perm,&perm_ptr));
695 
696   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
697   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
698   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
699   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
700 
701   C->assembled    = PETSC_TRUE;
702   C->preallocated = PETSC_TRUE;
703 
704   PetscCall(PetscLogFlops(1.3333*bs*bs2*b->mbs)); /* from inverting diagonal blocks */
705   PetscFunctionReturn(0);
706 }
707 
708 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
709 {
710   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
711   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
712   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
713   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
714   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
715   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
716   MatScalar      *work;
717   PetscInt       *pivots;
718   PetscBool      allowzeropivot,zeropivotdetected;
719 
720   PetscFunctionBegin;
721   PetscCall(PetscCalloc1(bs2*mbs,&rtmp));
722   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
723   il[0] = 0;
724   for (i=0; i<mbs; i++) jl[i] = mbs;
725 
726   PetscCall(PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work));
727   PetscCall(PetscMalloc1(bs,&pivots));
728   allowzeropivot = PetscNot(A->erroriffailure);
729 
730   ai = a->i; aj = a->j; aa = a->a;
731 
732   /* for each row k */
733   for (k = 0; k<mbs; k++) {
734 
735     /*initialize k-th row with elements nonzero in row k of A */
736     jmin = ai[k]; jmax = ai[k+1];
737     ap   = aa + jmin*bs2;
738     for (j = jmin; j < jmax; j++) {
739       vj       = aj[j];   /* block col. index */
740       rtmp_ptr = rtmp + vj*bs2;
741       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
742     }
743 
744     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
745     PetscCall(PetscArraycpy(dk,rtmp+k*bs2,bs2));
746     i    = jl[k]; /* first row to be added to k_th row  */
747 
748     while (i < k) {
749       nexti = jl[i]; /* next row to be added to k_th row */
750 
751       /* compute multiplier */
752       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
753 
754       /* uik = -inv(Di)*U_bar(i,k) */
755       diag = ba + i*bs2;
756       u    = ba + ili*bs2;
757       PetscCall(PetscArrayzero(uik,bs2));
758       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
759 
760       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
761       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
762       PetscCall(PetscLogFlops(2.0*bs*bs2));
763 
764       /* update -U(i,k) */
765       PetscCall(PetscArraycpy(ba+ili*bs2,uik,bs2));
766 
767       /* add multiple of row i to k-th row ... */
768       jmin = ili + 1; jmax = bi[i+1];
769       if (jmin < jmax) {
770         for (j=jmin; j<jmax; j++) {
771           /* rtmp += -U(i,k)^T * U_bar(i,j) */
772           rtmp_ptr = rtmp + bj[j]*bs2;
773           u        = ba + j*bs2;
774           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
775         }
776         PetscCall(PetscLogFlops(2.0*bs*bs2*(jmax-jmin)));
777 
778         /* ... add i to row list for next nonzero entry */
779         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
780         j     = bj[jmin];
781         jl[i] = jl[j]; jl[j] = i; /* update jl */
782       }
783       i = nexti;
784     }
785 
786     /* save nonzero entries in k-th row of U ... */
787 
788     /* invert diagonal block */
789     diag = ba+k*bs2;
790     PetscCall(PetscArraycpy(diag,dk,bs2));
791 
792     PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected));
793     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
794 
795     jmin = bi[k]; jmax = bi[k+1];
796     if (jmin < jmax) {
797       for (j=jmin; j<jmax; j++) {
798         vj       = bj[j];      /* block col. index of U */
799         u        = ba + j*bs2;
800         rtmp_ptr = rtmp + vj*bs2;
801         for (k1=0; k1<bs2; k1++) {
802           *u++        = *rtmp_ptr;
803           *rtmp_ptr++ = 0.0;
804         }
805       }
806 
807       /* ... add k to row list for first nonzero entry in k-th row */
808       il[k] = jmin;
809       i     = bj[jmin];
810       jl[k] = jl[i]; jl[i] = k;
811     }
812   }
813 
814   PetscCall(PetscFree(rtmp));
815   PetscCall(PetscFree2(il,jl));
816   PetscCall(PetscFree3(dk,uik,work));
817   PetscCall(PetscFree(pivots));
818 
819   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
820   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
821   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
822   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
823   C->assembled           = PETSC_TRUE;
824   C->preallocated        = PETSC_TRUE;
825 
826   PetscCall(PetscLogFlops(1.3333*bs*bs2*b->mbs)); /* from inverting diagonal blocks */
827   PetscFunctionReturn(0);
828 }
829 
830 /*
831     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
832     Version for blocks 2 by 2.
833 */
834 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
835 {
836   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
837   IS             perm = b->row;
838   const PetscInt *ai,*aj,*perm_ptr;
839   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
840   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
841   MatScalar      *ba = b->a,*aa,*ap;
842   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
843   PetscReal      shift = info->shiftamount;
844   PetscBool      allowzeropivot,zeropivotdetected;
845 
846   PetscFunctionBegin;
847   allowzeropivot = PetscNot(A->erroriffailure);
848 
849   /* initialization */
850   /* il and jl record the first nonzero element in each row of the accessing
851      window U(0:k, k:mbs-1).
852      jl:    list of rows to be added to uneliminated rows
853             i>= k: jl(i) is the first row to be added to row i
854             i<  k: jl(i) is the row following row i in some list of rows
855             jl(i) = mbs indicates the end of a list
856      il(i): points to the first nonzero element in columns k,...,mbs-1 of
857             row i of U */
858   PetscCall(PetscCalloc1(4*mbs,&rtmp));
859   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
860   il[0] = 0;
861   for (i=0; i<mbs; i++) jl[i] = mbs;
862 
863   PetscCall(ISGetIndices(perm,&perm_ptr));
864 
865   /* check permutation */
866   if (!a->permute) {
867     ai = a->i; aj = a->j; aa = a->a;
868   } else {
869     ai   = a->inew; aj = a->jnew;
870     PetscCall(PetscMalloc1(4*ai[mbs],&aa));
871     PetscCall(PetscArraycpy(aa,a->a,4*ai[mbs]));
872     PetscCall(PetscMalloc1(ai[mbs],&a2anew));
873     PetscCall(PetscArraycpy(a2anew,a->a2anew,ai[mbs]));
874 
875     for (i=0; i<mbs; i++) {
876       jmin = ai[i]; jmax = ai[i+1];
877       for (j=jmin; j<jmax; j++) {
878         while (a2anew[j] != j) {
879           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
880           for (k1=0; k1<4; k1++) {
881             dk[k1]     = aa[k*4+k1];
882             aa[k*4+k1] = aa[j*4+k1];
883             aa[j*4+k1] = dk[k1];
884           }
885         }
886         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
887         if (i > aj[j]) {
888           ap    = aa + j*4;  /* ptr to the beginning of the block */
889           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
890           ap[1] = ap[2];
891           ap[2] = dk[1];
892         }
893       }
894     }
895     PetscCall(PetscFree(a2anew));
896   }
897 
898   /* for each row k */
899   for (k = 0; k<mbs; k++) {
900 
901     /*initialize k-th row with elements nonzero in row perm(k) of A */
902     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
903     ap   = aa + jmin*4;
904     for (j = jmin; j < jmax; j++) {
905       vj       = perm_ptr[aj[j]];   /* block col. index */
906       rtmp_ptr = rtmp + vj*4;
907       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
908     }
909 
910     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
911     PetscCall(PetscArraycpy(dk,rtmp+k*4,4));
912     i    = jl[k]; /* first row to be added to k_th row  */
913 
914     while (i < k) {
915       nexti = jl[i]; /* next row to be added to k_th row */
916 
917       /* compute multiplier */
918       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
919 
920       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
921       diag   = ba + i*4;
922       u      = ba + ili*4;
923       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
924       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
925       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
926       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
927 
928       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
929       dk[0] += uik[0]*u[0] + uik[1]*u[1];
930       dk[1] += uik[2]*u[0] + uik[3]*u[1];
931       dk[2] += uik[0]*u[2] + uik[1]*u[3];
932       dk[3] += uik[2]*u[2] + uik[3]*u[3];
933 
934       PetscCall(PetscLogFlops(16.0*2.0));
935 
936       /* update -U(i,k): ba[ili] = uik */
937       PetscCall(PetscArraycpy(ba+ili*4,uik,4));
938 
939       /* add multiple of row i to k-th row ... */
940       jmin = ili + 1; jmax = bi[i+1];
941       if (jmin < jmax) {
942         for (j=jmin; j<jmax; j++) {
943           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
944           rtmp_ptr     = rtmp + bj[j]*4;
945           u            = ba + j*4;
946           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
947           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
948           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
949           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
950         }
951         PetscCall(PetscLogFlops(16.0*(jmax-jmin)));
952 
953         /* ... add i to row list for next nonzero entry */
954         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
955         j     = bj[jmin];
956         jl[i] = jl[j]; jl[j] = i; /* update jl */
957       }
958       i = nexti;
959     }
960 
961     /* save nonzero entries in k-th row of U ... */
962 
963     /* invert diagonal block */
964     diag = ba+k*4;
965     PetscCall(PetscArraycpy(diag,dk,4));
966     PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
967     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
968 
969     jmin = bi[k]; jmax = bi[k+1];
970     if (jmin < jmax) {
971       for (j=jmin; j<jmax; j++) {
972         vj       = bj[j];      /* block col. index of U */
973         u        = ba + j*4;
974         rtmp_ptr = rtmp + vj*4;
975         for (k1=0; k1<4; k1++) {
976           *u++        = *rtmp_ptr;
977           *rtmp_ptr++ = 0.0;
978         }
979       }
980 
981       /* ... add k to row list for first nonzero entry in k-th row */
982       il[k] = jmin;
983       i     = bj[jmin];
984       jl[k] = jl[i]; jl[i] = k;
985     }
986   }
987 
988   PetscCall(PetscFree(rtmp));
989   PetscCall(PetscFree2(il,jl));
990   if (a->permute) PetscCall(PetscFree(aa));
991   PetscCall(ISRestoreIndices(perm,&perm_ptr));
992 
993   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
994   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
995   C->assembled           = PETSC_TRUE;
996   C->preallocated        = PETSC_TRUE;
997 
998   PetscCall(PetscLogFlops(1.3333*8*b->mbs)); /* from inverting diagonal blocks */
999   PetscFunctionReturn(0);
1000 }
1001 
1002 /*
1003       Version for when blocks are 2 by 2 Using natural ordering
1004 */
1005 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1006 {
1007   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
1008   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1009   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1010   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1011   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1012   PetscReal      shift = info->shiftamount;
1013   PetscBool      allowzeropivot,zeropivotdetected;
1014 
1015   PetscFunctionBegin;
1016   allowzeropivot = PetscNot(A->erroriffailure);
1017 
1018   /* initialization */
1019   /* il and jl record the first nonzero element in each row of the accessing
1020      window U(0:k, k:mbs-1).
1021      jl:    list of rows to be added to uneliminated rows
1022             i>= k: jl(i) is the first row to be added to row i
1023             i<  k: jl(i) is the row following row i in some list of rows
1024             jl(i) = mbs indicates the end of a list
1025      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1026             row i of U */
1027   PetscCall(PetscCalloc1(4*mbs,&rtmp));
1028   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
1029   il[0] = 0;
1030   for (i=0; i<mbs; i++) jl[i] = mbs;
1031 
1032   ai = a->i; aj = a->j; aa = a->a;
1033 
1034   /* for each row k */
1035   for (k = 0; k<mbs; k++) {
1036 
1037     /*initialize k-th row with elements nonzero in row k of A */
1038     jmin = ai[k]; jmax = ai[k+1];
1039     ap   = aa + jmin*4;
1040     for (j = jmin; j < jmax; j++) {
1041       vj       = aj[j];   /* block col. index */
1042       rtmp_ptr = rtmp + vj*4;
1043       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1044     }
1045 
1046     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1047     PetscCall(PetscArraycpy(dk,rtmp+k*4,4));
1048     i    = jl[k]; /* first row to be added to k_th row  */
1049 
1050     while (i < k) {
1051       nexti = jl[i]; /* next row to be added to k_th row */
1052 
1053       /* compute multiplier */
1054       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1055 
1056       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1057       diag   = ba + i*4;
1058       u      = ba + ili*4;
1059       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1060       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1061       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1062       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1063 
1064       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1065       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1066       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1067       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1068       dk[3] += uik[2]*u[2] + uik[3]*u[3];
1069 
1070       PetscCall(PetscLogFlops(16.0*2.0));
1071 
1072       /* update -U(i,k): ba[ili] = uik */
1073       PetscCall(PetscArraycpy(ba+ili*4,uik,4));
1074 
1075       /* add multiple of row i to k-th row ... */
1076       jmin = ili + 1; jmax = bi[i+1];
1077       if (jmin < jmax) {
1078         for (j=jmin; j<jmax; j++) {
1079           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1080           rtmp_ptr     = rtmp + bj[j]*4;
1081           u            = ba + j*4;
1082           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1083           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1084           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1085           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1086         }
1087         PetscCall(PetscLogFlops(16.0*(jmax-jmin)));
1088 
1089         /* ... add i to row list for next nonzero entry */
1090         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1091         j     = bj[jmin];
1092         jl[i] = jl[j]; jl[j] = i; /* update jl */
1093       }
1094       i = nexti;
1095     }
1096 
1097     /* save nonzero entries in k-th row of U ... */
1098 
1099     /* invert diagonal block */
1100     diag = ba+k*4;
1101     PetscCall(PetscArraycpy(diag,dk,4));
1102     PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
1103     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1104 
1105     jmin = bi[k]; jmax = bi[k+1];
1106     if (jmin < jmax) {
1107       for (j=jmin; j<jmax; j++) {
1108         vj       = bj[j];      /* block col. index of U */
1109         u        = ba + j*4;
1110         rtmp_ptr = rtmp + vj*4;
1111         for (k1=0; k1<4; k1++) {
1112           *u++        = *rtmp_ptr;
1113           *rtmp_ptr++ = 0.0;
1114         }
1115       }
1116 
1117       /* ... add k to row list for first nonzero entry in k-th row */
1118       il[k] = jmin;
1119       i     = bj[jmin];
1120       jl[k] = jl[i]; jl[i] = k;
1121     }
1122   }
1123 
1124   PetscCall(PetscFree(rtmp));
1125   PetscCall(PetscFree2(il,jl));
1126 
1127   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1128   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1129   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1130   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1131   C->assembled           = PETSC_TRUE;
1132   C->preallocated        = PETSC_TRUE;
1133 
1134   PetscCall(PetscLogFlops(1.3333*8*b->mbs)); /* from inverting diagonal blocks */
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*
1139     Numeric U^T*D*U factorization for SBAIJ format.
1140     Version for blocks are 1 by 1.
1141 */
1142 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1143 {
1144   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1145   IS             ip=b->row;
1146   const PetscInt *ai,*aj,*rip;
1147   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1148   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1149   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1150   PetscReal      rs;
1151   FactorShiftCtx sctx;
1152 
1153   PetscFunctionBegin;
1154   /* MatPivotSetUp(): initialize shift context sctx */
1155   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1156 
1157   PetscCall(ISGetIndices(ip,&rip));
1158   if (!a->permute) {
1159     ai = a->i; aj = a->j; aa = a->a;
1160   } else {
1161     ai     = a->inew; aj = a->jnew;
1162     nz     = ai[mbs];
1163     PetscCall(PetscMalloc1(nz,&aa));
1164     a2anew = a->a2anew;
1165     bval   = a->a;
1166     for (j=0; j<nz; j++) {
1167       aa[a2anew[j]] = *(bval++);
1168     }
1169   }
1170 
1171   /* initialization */
1172   /* il and jl record the first nonzero element in each row of the accessing
1173      window U(0:k, k:mbs-1).
1174      jl:    list of rows to be added to uneliminated rows
1175             i>= k: jl(i) is the first row to be added to row i
1176             i<  k: jl(i) is the row following row i in some list of rows
1177             jl(i) = mbs indicates the end of a list
1178      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1179             row i of U */
1180   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl));
1181 
1182   do {
1183     sctx.newshift = PETSC_FALSE;
1184     il[0] = 0;
1185     for (i=0; i<mbs; i++) {
1186       rtmp[i] = 0.0; jl[i] = mbs;
1187     }
1188 
1189     for (k = 0; k<mbs; k++) {
1190       /*initialize k-th row by the perm[k]-th row of A */
1191       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1192       bval = ba + bi[k];
1193       for (j = jmin; j < jmax; j++) {
1194         col       = rip[aj[j]];
1195         rtmp[col] = aa[j];
1196         *bval++   = 0.0; /* for in-place factorization */
1197       }
1198 
1199       /* shift the diagonal of the matrix */
1200       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1201 
1202       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1203       dk = rtmp[k];
1204       i  = jl[k]; /* first row to be added to k_th row  */
1205 
1206       while (i < k) {
1207         nexti = jl[i]; /* next row to be added to k_th row */
1208 
1209         /* compute multiplier, update diag(k) and U(i,k) */
1210         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1211         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
1212         dk     += uikdi*ba[ili];
1213         ba[ili] = uikdi; /* -U(i,k) */
1214 
1215         /* add multiple of row i to k-th row */
1216         jmin = ili + 1; jmax = bi[i+1];
1217         if (jmin < jmax) {
1218           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1219           PetscCall(PetscLogFlops(2.0*(jmax-jmin)));
1220 
1221           /* update il and jl for row i */
1222           il[i] = jmin;
1223           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1224         }
1225         i = nexti;
1226       }
1227 
1228       /* shift the diagonals when zero pivot is detected */
1229       /* compute rs=sum of abs(off-diagonal) */
1230       rs   = 0.0;
1231       jmin = bi[k]+1;
1232       nz   = bi[k+1] - jmin;
1233       if (nz) {
1234         bcol = bj + jmin;
1235         while (nz--) {
1236           rs += PetscAbsScalar(rtmp[*bcol]);
1237           bcol++;
1238         }
1239       }
1240 
1241       sctx.rs = rs;
1242       sctx.pv = dk;
1243       PetscCall(MatPivotCheck(C,A,info,&sctx,k));
1244       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1245       dk = sctx.pv;
1246 
1247       /* copy data into U(k,:) */
1248       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1249       jmin      = bi[k]+1; jmax = bi[k+1];
1250       if (jmin < jmax) {
1251         for (j=jmin; j<jmax; j++) {
1252           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1253         }
1254         /* add the k-th row into il and jl */
1255         il[k] = jmin;
1256         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1257       }
1258     }
1259   } while (sctx.newshift);
1260   PetscCall(PetscFree3(rtmp,il,jl));
1261   if (a->permute) PetscCall(PetscFree(aa));
1262 
1263   PetscCall(ISRestoreIndices(ip,&rip));
1264 
1265   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1266   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1267   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1268   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1269   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1270   C->assembled           = PETSC_TRUE;
1271   C->preallocated        = PETSC_TRUE;
1272 
1273   PetscCall(PetscLogFlops(C->rmap->N));
1274   if (sctx.nshift) {
1275     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1276       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1277     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1278       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1279     }
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /*
1285   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1286   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1287 */
1288 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1289 {
1290   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1291   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1292   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1293   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1294   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1295   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1296   FactorShiftCtx sctx;
1297   PetscReal      rs;
1298   MatScalar      d,*v;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r));
1302 
1303   /* MatPivotSetUp(): initialize shift context sctx */
1304   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1305 
1306   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1307     sctx.shift_top = info->zeropivot;
1308 
1309     PetscCall(PetscArrayzero(rtmp,mbs));
1310 
1311     for (i=0; i<mbs; i++) {
1312       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1313       d        = (aa)[a->diag[i]];
1314       rtmp[i] += -PetscRealPart(d);  /* diagonal entry */
1315       ajtmp    = aj + ai[i] + 1;     /* exclude diagonal */
1316       v        = aa + ai[i] + 1;
1317       nz       = ai[i+1] - ai[i] - 1;
1318       for (j=0; j<nz; j++) {
1319         rtmp[i]        += PetscAbsScalar(v[j]);
1320         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1321       }
1322       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1323     }
1324     sctx.shift_top *= 1.1;
1325     sctx.nshift_max = 5;
1326     sctx.shift_lo   = 0.;
1327     sctx.shift_hi   = 1.;
1328   }
1329 
1330   /* allocate working arrays
1331      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1332      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1333   */
1334   do {
1335     sctx.newshift = PETSC_FALSE;
1336 
1337     for (i=0; i<mbs; i++) c2r[i] = mbs;
1338     if (mbs) il[0] = 0;
1339 
1340     for (k = 0; k<mbs; k++) {
1341       /* zero rtmp */
1342       nz    = bi[k+1] - bi[k];
1343       bjtmp = bj + bi[k];
1344       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
1345 
1346       /* load in initial unfactored row */
1347       bval = ba + bi[k];
1348       jmin = ai[k]; jmax = ai[k+1];
1349       for (j = jmin; j < jmax; j++) {
1350         col       = aj[j];
1351         rtmp[col] = aa[j];
1352         *bval++   = 0.0; /* for in-place factorization */
1353       }
1354       /* shift the diagonal of the matrix: ZeropivotApply() */
1355       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
1356 
1357       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1358       dk = rtmp[k];
1359       i  = c2r[k]; /* first row to be added to k_th row  */
1360 
1361       while (i < k) {
1362         nexti = c2r[i]; /* next row to be added to k_th row */
1363 
1364         /* compute multiplier, update diag(k) and U(i,k) */
1365         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1366         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
1367         dk     += uikdi*ba[ili]; /* update diag[k] */
1368         ba[ili] = uikdi; /* -U(i,k) */
1369 
1370         /* add multiple of row i to k-th row */
1371         jmin = ili + 1; jmax = bi[i+1];
1372         if (jmin < jmax) {
1373           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1374           /* update il and c2r for row i */
1375           il[i] = jmin;
1376           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1377         }
1378         i = nexti;
1379       }
1380 
1381       /* copy data into U(k,:) */
1382       rs   = 0.0;
1383       jmin = bi[k]; jmax = bi[k+1]-1;
1384       if (jmin < jmax) {
1385         for (j=jmin; j<jmax; j++) {
1386           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1387         }
1388         /* add the k-th row into il and c2r */
1389         il[k] = jmin;
1390         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1391       }
1392 
1393       sctx.rs = rs;
1394       sctx.pv = dk;
1395       PetscCall(MatPivotCheck(B,A,info,&sctx,k));
1396       if (sctx.newshift) break;
1397       dk = sctx.pv;
1398 
1399       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1400     }
1401   } while (sctx.newshift);
1402 
1403   PetscCall(PetscFree3(rtmp,il,c2r));
1404 
1405   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1406   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1407   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1408   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1409   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1410   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1411 
1412   B->assembled    = PETSC_TRUE;
1413   B->preallocated = PETSC_TRUE;
1414 
1415   PetscCall(PetscLogFlops(B->rmap->n));
1416 
1417   /* MatPivotView() */
1418   if (sctx.nshift) {
1419     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1420       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
1421     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1422       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1423     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1424       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1431 {
1432   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1433   PetscInt       i,j,mbs = a->mbs;
1434   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1435   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1436   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1437   PetscReal      rs;
1438   FactorShiftCtx sctx;
1439 
1440   PetscFunctionBegin;
1441   /* MatPivotSetUp(): initialize shift context sctx */
1442   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1443 
1444   /* initialization */
1445   /* il and jl record the first nonzero element in each row of the accessing
1446      window U(0:k, k:mbs-1).
1447      jl:    list of rows to be added to uneliminated rows
1448             i>= k: jl(i) is the first row to be added to row i
1449             i<  k: jl(i) is the row following row i in some list of rows
1450             jl(i) = mbs indicates the end of a list
1451      il(i): points to the first nonzero element in U(i,k:mbs-1)
1452   */
1453   PetscCall(PetscMalloc1(mbs,&rtmp));
1454   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
1455 
1456   do {
1457     sctx.newshift = PETSC_FALSE;
1458     il[0] = 0;
1459     for (i=0; i<mbs; i++) {
1460       rtmp[i] = 0.0; jl[i] = mbs;
1461     }
1462 
1463     for (k = 0; k<mbs; k++) {
1464       /*initialize k-th row with elements nonzero in row perm(k) of A */
1465       nz   = ai[k+1] - ai[k];
1466       acol = aj + ai[k];
1467       aval = aa + ai[k];
1468       bval = ba + bi[k];
1469       while (nz--) {
1470         rtmp[*acol++] = *aval++;
1471         *bval++       = 0.0; /* for in-place factorization */
1472       }
1473 
1474       /* shift the diagonal of the matrix */
1475       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1476 
1477       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1478       dk = rtmp[k];
1479       i  = jl[k]; /* first row to be added to k_th row  */
1480 
1481       while (i < k) {
1482         nexti = jl[i]; /* next row to be added to k_th row */
1483         /* compute multiplier, update D(k) and U(i,k) */
1484         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1485         uikdi   = -ba[ili]*ba[bi[i]];
1486         dk     += uikdi*ba[ili];
1487         ba[ili] = uikdi; /* -U(i,k) */
1488 
1489         /* add multiple of row i to k-th row ... */
1490         jmin = ili + 1;
1491         nz   = bi[i+1] - jmin;
1492         if (nz > 0) {
1493           bcol = bj + jmin;
1494           bval = ba + jmin;
1495           PetscCall(PetscLogFlops(2.0*nz));
1496           while (nz--) rtmp[*bcol++] += uikdi*(*bval++);
1497 
1498           /* update il and jl for i-th row */
1499           il[i] = jmin;
1500           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1501         }
1502         i = nexti;
1503       }
1504 
1505       /* shift the diagonals when zero pivot is detected */
1506       /* compute rs=sum of abs(off-diagonal) */
1507       rs   = 0.0;
1508       jmin = bi[k]+1;
1509       nz   = bi[k+1] - jmin;
1510       if (nz) {
1511         bcol = bj + jmin;
1512         while (nz--) {
1513           rs += PetscAbsScalar(rtmp[*bcol]);
1514           bcol++;
1515         }
1516       }
1517 
1518       sctx.rs = rs;
1519       sctx.pv = dk;
1520       PetscCall(MatPivotCheck(C,A,info,&sctx,k));
1521       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1522       dk = sctx.pv;
1523 
1524       /* copy data into U(k,:) */
1525       ba[bi[k]] = 1.0/dk;
1526       jmin      = bi[k]+1;
1527       nz        = bi[k+1] - jmin;
1528       if (nz) {
1529         bcol = bj + jmin;
1530         bval = ba + jmin;
1531         while (nz--) {
1532           *bval++       = rtmp[*bcol];
1533           rtmp[*bcol++] = 0.0;
1534         }
1535         /* add k-th row into il and jl */
1536         il[k] = jmin;
1537         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1538       }
1539     } /* end of for (k = 0; k<mbs; k++) */
1540   } while (sctx.newshift);
1541   PetscCall(PetscFree(rtmp));
1542   PetscCall(PetscFree2(il,jl));
1543 
1544   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1545   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1546   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1547   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1548   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1549 
1550   C->assembled    = PETSC_TRUE;
1551   C->preallocated = PETSC_TRUE;
1552 
1553   PetscCall(PetscLogFlops(C->rmap->N));
1554   if (sctx.nshift) {
1555     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1556       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1557     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1558       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1559     }
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1565 {
1566   Mat            C;
1567 
1568   PetscFunctionBegin;
1569   PetscCall(MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C));
1570   PetscCall(MatCholeskyFactorSymbolic(C,A,perm,info));
1571   PetscCall(MatCholeskyFactorNumeric(C,A,info));
1572 
1573   A->ops->solve          = C->ops->solve;
1574   A->ops->solvetranspose = C->ops->solvetranspose;
1575 
1576   PetscCall(MatHeaderMerge(A,&C));
1577   PetscFunctionReturn(0);
1578 }
1579