xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <petscis.h>
6 
7 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
8 {
9   Mat_SeqSBAIJ *fact=(Mat_SeqSBAIJ*)F->data;
10   MatScalar    *dd=fact->a;
11   PetscInt     mbs=fact->mbs,bs=F->rmap->bs,i,nneg_tmp,npos_tmp,*fi=fact->diag;
12 
13   PetscFunctionBegin;
14   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %" PetscInt_FMT " >1 yet",bs);
15 
16   nneg_tmp = 0; npos_tmp = 0;
17   for (i=0; i<mbs; i++) {
18     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20     fi++;
21   }
22   if (nneg)  *nneg  = nneg_tmp;
23   if (npos)  *npos  = npos_tmp;
24   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25   PetscFunctionReturn(0);
26 }
27 
28 /*
29   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31 */
32 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
33 {
34   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
35   const PetscInt *rip,*ai,*aj;
36   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
37   PetscInt       m,reallocs = 0,prow;
38   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
39   PetscReal      f = info->fill;
40   PetscBool      perm_identity;
41 
42   PetscFunctionBegin;
43   /* check whether perm is the identity mapping */
44   PetscCall(ISIdentity(perm,&perm_identity));
45   PetscCall(ISGetIndices(perm,&rip));
46 
47   if (perm_identity) { /* without permutation */
48     a->permute = PETSC_FALSE;
49 
50     ai = a->i; aj = a->j;
51   } else {            /* non-trivial permutation */
52     a->permute = PETSC_TRUE;
53 
54     PetscCall(MatReorderingSeqSBAIJ(A,perm));
55 
56     ai = a->inew; aj = a->jnew;
57   }
58 
59   /* initialization */
60   PetscCall(PetscMalloc1(mbs+1,&iu));
61   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
62   PetscCall(PetscMalloc1(umax,&ju));
63   iu[0] = mbs+1;
64   juidx = mbs + 1; /* index for ju */
65   /* jl linked list for pivot row -- linked list for col index */
66   PetscCall(PetscMalloc2(mbs,&jl,mbs,&q));
67   for (i=0; i<mbs; i++) {
68     jl[i] = mbs;
69     q[i]  = 0;
70   }
71 
72   /* for each row k */
73   for (k=0; k<mbs; k++) {
74     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
75     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
76     q[k] = mbs;
77     /* initialize nonzero structure of k-th row to row rip[k] of A */
78     jmin = ai[rip[k]] +1; /* exclude diag[k] */
79     jmax = ai[rip[k]+1];
80     for (j=jmin; j<jmax; j++) {
81       vj = rip[aj[j]]; /* col. value */
82       if (vj > k) {
83         qm = k;
84         do {
85           m = qm; qm = q[m];
86         } while (qm < vj);
87         PetscCheck(qm != vj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A");
88         nzk++;
89         q[m]  = vj;
90         q[vj] = qm;
91       } /* if (vj > k) */
92     } /* for (j=jmin; j<jmax; j++) */
93 
94     /* modify nonzero structure of k-th row by computing fill-in
95        for each row i to be merged in */
96     prow = k;
97     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
98 
99     while (prow < k) {
100       /* merge row prow into k-th row */
101       jmin = iu[prow] + 1; jmax = iu[prow+1];
102       qm   = k;
103       for (j=jmin; j<jmax; j++) {
104         vj = ju[j];
105         do {
106           m = qm; qm = q[m];
107         } while (qm < vj);
108         if (qm != vj) {
109           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
110         }
111       }
112       prow = jl[prow]; /* next pivot row */
113     }
114 
115     /* add k to row list for first nonzero element in k-th row */
116     if (nzk > 0) {
117       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
118       jl[k] = jl[i]; jl[i] = k;
119     }
120     iu[k+1] = iu[k] + nzk;
121 
122     /* allocate more space to ju if needed */
123     if (iu[k+1] > umax) {
124       /* estimate how much additional space we will need */
125       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
126       /* just double the memory each time */
127       maxadd = umax;
128       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
129       umax += maxadd;
130 
131       /* allocate a longer ju */
132       PetscCall(PetscMalloc1(umax,&jutmp));
133       PetscCall(PetscArraycpy(jutmp,ju,iu[k]));
134       PetscCall(PetscFree(ju));
135       ju   = jutmp;
136       reallocs++; /* count how many times we realloc */
137     }
138 
139     /* save nonzero structure of k-th row in ju */
140     i=k;
141     while (nzk--) {
142       i           = q[i];
143       ju[juidx++] = i;
144     }
145   }
146 
147 #if defined(PETSC_USE_INFO)
148   if (ai[mbs] != 0) {
149     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
150     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
151     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
152     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
153     PetscCall(PetscInfo(A,"for best performance.\n"));
154   } else {
155     PetscCall(PetscInfo(A,"Empty matrix\n"));
156   }
157 #endif
158 
159   PetscCall(ISRestoreIndices(perm,&rip));
160   PetscCall(PetscFree2(jl,q));
161 
162   /* put together the new matrix */
163   PetscCall(MatSeqSBAIJSetPreallocation(F,bs,MAT_SKIP_ALLOCATION,NULL));
164 
165   /* PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)iperm)); */
166   b                = (Mat_SeqSBAIJ*)(F)->data;
167   b->singlemalloc  = PETSC_FALSE;
168   b->free_a        = PETSC_TRUE;
169   b->free_ij       = PETSC_TRUE;
170 
171   PetscCall(PetscMalloc1((iu[mbs]+1)*bs2,&b->a));
172   b->j    = ju;
173   b->i    = iu;
174   b->diag = NULL;
175   b->ilen = NULL;
176   b->imax = NULL;
177   b->row  = perm;
178 
179   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
180 
181   PetscCall(PetscObjectReference((PetscObject)perm));
182 
183   b->icol = perm;
184   PetscCall(PetscObjectReference((PetscObject)perm));
185   PetscCall(PetscMalloc1(bs*mbs+bs,&b->solve_work));
186   /* In b structure:  Free imax, ilen, old a, old j.
187      Allocate idnew, solve_work, new a, new j */
188   PetscCall(PetscLogObjectMemory((PetscObject)F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))));
189   b->maxnz = b->nz = iu[mbs];
190 
191   (F)->info.factor_mallocs   = reallocs;
192   (F)->info.fill_ratio_given = f;
193   if (ai[mbs] != 0) {
194     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
195   } else {
196     (F)->info.fill_ratio_needed = 0.0;
197   }
198   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity));
199   PetscFunctionReturn(0);
200 }
201 /*
202     Symbolic U^T*D*U factorization for SBAIJ format.
203     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
204 */
205 #include <petscbt.h>
206 #include <../src/mat/utils/freespace.h>
207 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
208 {
209   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
210   Mat_SeqSBAIJ       *b;
211   PetscBool          perm_identity,missing;
212   PetscReal          fill = info->fill;
213   const PetscInt     *rip,*ai=a->i,*aj=a->j;
214   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow;
215   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
216   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
217   PetscFreeSpaceList free_space=NULL,current_space=NULL;
218   PetscBT            lnkbt;
219 
220   PetscFunctionBegin;
221   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
222   PetscCall(MatMissingDiagonal(A,&missing,&i));
223   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
224   if (bs > 1) {
225     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info));
226     PetscFunctionReturn(0);
227   }
228 
229   /* check whether perm is the identity mapping */
230   PetscCall(ISIdentity(perm,&perm_identity));
231   PetscCheck(perm_identity,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
232   a->permute = PETSC_FALSE;
233   PetscCall(ISGetIndices(perm,&rip));
234 
235   /* initialization */
236   PetscCall(PetscMalloc1(mbs+1,&ui));
237   PetscCall(PetscMalloc1(mbs+1,&udiag));
238   ui[0] = 0;
239 
240   /* jl: linked list for storing indices of the pivot rows
241      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
242   PetscCall(PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols));
243   for (i=0; i<mbs; i++) {
244     jl[i] = mbs; il[i] = 0;
245   }
246 
247   /* create and initialize a linked list for storing column indices of the active row k */
248   nlnk = mbs + 1;
249   PetscCall(PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt));
250 
251   /* initial FreeSpace size is fill*(ai[mbs]+1) */
252   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space));
253   current_space = free_space;
254 
255   for (k=0; k<mbs; k++) {  /* for each active row k */
256     /* initialize lnk by the column indices of row rip[k] of A */
257     nzk   = 0;
258     ncols = ai[k+1] - ai[k];
259     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %" PetscInt_FMT " in matrix ",k);
260     for (j=0; j<ncols; j++) {
261       i       = *(aj + ai[k] + j);
262       cols[j] = i;
263     }
264     PetscCall(PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt));
265     nzk += nlnk;
266 
267     /* update lnk by computing fill-in for each pivot row to be merged in */
268     prow = jl[k]; /* 1st pivot row */
269 
270     while (prow < k) {
271       nextprow = jl[prow];
272       /* merge prow into k-th row */
273       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
274       jmax   = ui[prow+1];
275       ncols  = jmax-jmin;
276       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
277       PetscCall(PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt));
278       nzk   += nlnk;
279 
280       /* update il and jl for prow */
281       if (jmin < jmax) {
282         il[prow] = jmin;
283         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
284       }
285       prow = nextprow;
286     }
287 
288     /* if free space is not available, make more free space */
289     if (current_space->local_remaining<nzk) {
290       i    = mbs - k + 1; /* num of unfactored rows */
291       i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
292       PetscCall(PetscFreeSpaceGet(i,&current_space));
293       reallocs++;
294     }
295 
296     /* copy data into free space, then initialize lnk */
297     PetscCall(PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt));
298 
299     /* add the k-th row into il and jl */
300     if (nzk > 1) {
301       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
302       jl[k] = jl[i]; jl[i] = k;
303       il[k] = ui[k] + 1;
304     }
305     ui_ptr[k] = current_space->array;
306 
307     current_space->array           += nzk;
308     current_space->local_used      += nzk;
309     current_space->local_remaining -= nzk;
310 
311     ui[k+1] = ui[k] + nzk;
312   }
313 
314   PetscCall(ISRestoreIndices(perm,&rip));
315   PetscCall(PetscFree4(ui_ptr,il,jl,cols));
316 
317   /* destroy list of free space and other temporary array(s) */
318   PetscCall(PetscMalloc1(ui[mbs]+1,&uj));
319   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag)); /* store matrix factor */
320   PetscCall(PetscLLDestroy(lnk,lnkbt));
321 
322   /* put together the new matrix in MATSEQSBAIJ format */
323   PetscCall(MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL));
324 
325   b               = (Mat_SeqSBAIJ*)fact->data;
326   b->singlemalloc = PETSC_FALSE;
327   b->free_a       = PETSC_TRUE;
328   b->free_ij      = PETSC_TRUE;
329 
330   PetscCall(PetscMalloc1(ui[mbs]+1,&b->a));
331 
332   b->j         = uj;
333   b->i         = ui;
334   b->diag      = udiag;
335   b->free_diag = PETSC_TRUE;
336   b->ilen      = NULL;
337   b->imax      = NULL;
338   b->row       = perm;
339   b->icol      = perm;
340 
341   PetscCall(PetscObjectReference((PetscObject)perm));
342   PetscCall(PetscObjectReference((PetscObject)perm));
343 
344   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
345 
346   PetscCall(PetscMalloc1(mbs+1,&b->solve_work));
347   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar))));
348 
349   b->maxnz = b->nz = ui[mbs];
350 
351   fact->info.factor_mallocs   = reallocs;
352   fact->info.fill_ratio_given = fill;
353   if (ai[mbs] != 0) {
354     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
355   } else {
356     fact->info.fill_ratio_needed = 0.0;
357   }
358 #if defined(PETSC_USE_INFO)
359   if (ai[mbs] != 0) {
360     PetscReal af = fact->info.fill_ratio_needed;
361     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
362     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
363     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
364   } else {
365     PetscCall(PetscInfo(A,"Empty matrix\n"));
366   }
367 #endif
368   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
369   PetscFunctionReturn(0);
370 }
371 
372 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
373 {
374   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
375   Mat_SeqSBAIJ       *b;
376   PetscBool          perm_identity,missing;
377   PetscReal          fill = info->fill;
378   const PetscInt     *rip,*ai,*aj;
379   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
380   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
381   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
382   PetscFreeSpaceList free_space=NULL,current_space=NULL;
383   PetscBT            lnkbt;
384 
385   PetscFunctionBegin;
386   PetscCall(MatMissingDiagonal(A,&missing,&d));
387   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
388 
389   /*
390    This code originally uses Modified Sparse Row (MSR) storage
391    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
392    Then it is rewritten so the factor B takes seqsbaij format. However the associated
393    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
394    thus the original code in MSR format is still used for these cases.
395    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
396    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
397   */
398   if (bs > 1) {
399     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info));
400     PetscFunctionReturn(0);
401   }
402 
403   /* check whether perm is the identity mapping */
404   PetscCall(ISIdentity(perm,&perm_identity));
405   PetscCheck(perm_identity,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
406   a->permute = PETSC_FALSE;
407   ai = a->i; aj = a->j;
408   PetscCall(ISGetIndices(perm,&rip));
409 
410   /* initialization */
411   PetscCall(PetscMalloc1(mbs+1,&ui));
412   ui[0] = 0;
413 
414   /* jl: linked list for storing indices of the pivot rows
415      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
416   PetscCall(PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols));
417   for (i=0; i<mbs; i++) {
418     jl[i] = mbs; il[i] = 0;
419   }
420 
421   /* create and initialize a linked list for storing column indices of the active row k */
422   nlnk = mbs + 1;
423   PetscCall(PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt));
424 
425   /* initial FreeSpace size is fill*(ai[mbs]+1) */
426   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space));
427   current_space = free_space;
428 
429   for (k=0; k<mbs; k++) {  /* for each active row k */
430     /* initialize lnk by the column indices of row rip[k] of A */
431     nzk   = 0;
432     ncols = ai[rip[k]+1] - ai[rip[k]];
433     for (j=0; j<ncols; j++) {
434       i       = *(aj + ai[rip[k]] + j);
435       cols[j] = rip[i];
436     }
437     PetscCall(PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt));
438     nzk += nlnk;
439 
440     /* update lnk by computing fill-in for each pivot row to be merged in */
441     prow = jl[k]; /* 1st pivot row */
442 
443     while (prow < k) {
444       nextprow = jl[prow];
445       /* merge prow into k-th row */
446       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
447       jmax   = ui[prow+1];
448       ncols  = jmax-jmin;
449       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
450       PetscCall(PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt));
451       nzk   += nlnk;
452 
453       /* update il and jl for prow */
454       if (jmin < jmax) {
455         il[prow] = jmin;
456 
457         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
458       }
459       prow = nextprow;
460     }
461 
462     /* if free space is not available, make more free space */
463     if (current_space->local_remaining<nzk) {
464       i = mbs - k + 1;          /* num of unfactored rows */
465       i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
466       PetscCall(PetscFreeSpaceGet(i,&current_space));
467       reallocs++;
468     }
469 
470     /* copy data into free space, then initialize lnk */
471     PetscCall(PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt));
472 
473     /* add the k-th row into il and jl */
474     if (nzk-1 > 0) {
475       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
476       jl[k] = jl[i]; jl[i] = k;
477       il[k] = ui[k] + 1;
478     }
479     ui_ptr[k] = current_space->array;
480 
481     current_space->array           += nzk;
482     current_space->local_used      += nzk;
483     current_space->local_remaining -= nzk;
484 
485     ui[k+1] = ui[k] + nzk;
486   }
487 
488   PetscCall(ISRestoreIndices(perm,&rip));
489   PetscCall(PetscFree4(ui_ptr,il,jl,cols));
490 
491   /* destroy list of free space and other temporary array(s) */
492   PetscCall(PetscMalloc1(ui[mbs]+1,&uj));
493   PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
494   PetscCall(PetscLLDestroy(lnk,lnkbt));
495 
496   /* put together the new matrix in MATSEQSBAIJ format */
497   PetscCall(MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL));
498 
499   b               = (Mat_SeqSBAIJ*)fact->data;
500   b->singlemalloc = PETSC_FALSE;
501   b->free_a       = PETSC_TRUE;
502   b->free_ij      = PETSC_TRUE;
503 
504   PetscCall(PetscMalloc1(ui[mbs]+1,&b->a));
505 
506   b->j    = uj;
507   b->i    = ui;
508   b->diag = NULL;
509   b->ilen = NULL;
510   b->imax = NULL;
511   b->row  = perm;
512 
513   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
514 
515   PetscCall(PetscObjectReference((PetscObject)perm));
516   b->icol  = perm;
517   PetscCall(PetscObjectReference((PetscObject)perm));
518   PetscCall(PetscMalloc1(mbs+1,&b->solve_work));
519   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))));
520   b->maxnz = b->nz = ui[mbs];
521 
522   fact->info.factor_mallocs   = reallocs;
523   fact->info.fill_ratio_given = fill;
524   if (ai[mbs] != 0) {
525     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
526   } else {
527     fact->info.fill_ratio_needed = 0.0;
528   }
529 #if defined(PETSC_USE_INFO)
530   if (ai[mbs] != 0) {
531     PetscReal af = fact->info.fill_ratio_needed;
532     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
533     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
534     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
535   } else {
536     PetscCall(PetscInfo(A,"Empty matrix\n"));
537   }
538 #endif
539   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity));
540   PetscFunctionReturn(0);
541 }
542 
543 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
544 {
545   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
546   IS             perm = b->row;
547   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
548   PetscInt       i,j;
549   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
550   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
551   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
552   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
553   MatScalar      *work;
554   PetscInt       *pivots;
555   PetscBool      allowzeropivot,zeropivotdetected;
556 
557   PetscFunctionBegin;
558   /* initialization */
559   PetscCall(PetscCalloc1(bs2*mbs,&rtmp));
560   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
561   allowzeropivot = PetscNot(A->erroriffailure);
562 
563   il[0] = 0;
564   for (i=0; i<mbs; i++) jl[i] = mbs;
565 
566   PetscCall(PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work));
567   PetscCall(PetscMalloc1(bs,&pivots));
568 
569   PetscCall(ISGetIndices(perm,&perm_ptr));
570 
571   /* check permutation */
572   if (!a->permute) {
573     ai = a->i; aj = a->j; aa = a->a;
574   } else {
575     ai   = a->inew; aj = a->jnew;
576     PetscCall(PetscMalloc1(bs2*ai[mbs],&aa));
577     PetscCall(PetscArraycpy(aa,a->a,bs2*ai[mbs]));
578     PetscCall(PetscMalloc1(ai[mbs],&a2anew));
579     PetscCall(PetscArraycpy(a2anew,a->a2anew,ai[mbs]));
580 
581     for (i=0; i<mbs; i++) {
582       jmin = ai[i]; jmax = ai[i+1];
583       for (j=jmin; j<jmax; j++) {
584         while (a2anew[j] != j) {
585           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
586           for (k1=0; k1<bs2; k1++) {
587             dk[k1]       = aa[k*bs2+k1];
588             aa[k*bs2+k1] = aa[j*bs2+k1];
589             aa[j*bs2+k1] = dk[k1];
590           }
591         }
592         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
593         if (i > aj[j]) {
594           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
595           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
596           for (k=0; k<bs; k++) {               /* j-th block of aa <- dk^T */
597             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
598           }
599         }
600       }
601     }
602     PetscCall(PetscFree(a2anew));
603   }
604 
605   /* for each row k */
606   for (k = 0; k<mbs; k++) {
607 
608     /*initialize k-th row with elements nonzero in row perm(k) of A */
609     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
610 
611     ap = aa + jmin*bs2;
612     for (j = jmin; j < jmax; j++) {
613       vj       = perm_ptr[aj[j]];   /* block col. index */
614       rtmp_ptr = rtmp + vj*bs2;
615       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
616     }
617 
618     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
619     PetscCall(PetscArraycpy(dk,rtmp+k*bs2,bs2));
620     i    = jl[k]; /* first row to be added to k_th row  */
621 
622     while (i < k) {
623       nexti = jl[i]; /* next row to be added to k_th row */
624 
625       /* compute multiplier */
626       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
627 
628       /* uik = -inv(Di)*U_bar(i,k) */
629       diag = ba + i*bs2;
630       u    = ba + ili*bs2;
631       PetscCall(PetscArrayzero(uik,bs2));
632       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
633 
634       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
635       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
636       PetscCall(PetscLogFlops(4.0*bs*bs2));
637 
638       /* update -U(i,k) */
639       PetscCall(PetscArraycpy(ba+ili*bs2,uik,bs2));
640 
641       /* add multiple of row i to k-th row ... */
642       jmin = ili + 1; jmax = bi[i+1];
643       if (jmin < jmax) {
644         for (j=jmin; j<jmax; j++) {
645           /* rtmp += -U(i,k)^T * U_bar(i,j) */
646           rtmp_ptr = rtmp + bj[j]*bs2;
647           u        = ba + j*bs2;
648           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
649         }
650         PetscCall(PetscLogFlops(2.0*bs*bs2*(jmax-jmin)));
651 
652         /* ... add i to row list for next nonzero entry */
653         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
654         j     = bj[jmin];
655         jl[i] = jl[j]; jl[j] = i; /* update jl */
656       }
657       i = nexti;
658     }
659 
660     /* save nonzero entries in k-th row of U ... */
661 
662     /* invert diagonal block */
663     diag = ba+k*bs2;
664     PetscCall(PetscArraycpy(diag,dk,bs2));
665 
666     PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected));
667     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
668 
669     jmin = bi[k]; jmax = bi[k+1];
670     if (jmin < jmax) {
671       for (j=jmin; j<jmax; j++) {
672         vj       = bj[j];      /* block col. index of U */
673         u        = ba + j*bs2;
674         rtmp_ptr = rtmp + vj*bs2;
675         for (k1=0; k1<bs2; k1++) {
676           *u++        = *rtmp_ptr;
677           *rtmp_ptr++ = 0.0;
678         }
679       }
680 
681       /* ... add k to row list for first nonzero entry in k-th row */
682       il[k] = jmin;
683       i     = bj[jmin];
684       jl[k] = jl[i]; jl[i] = k;
685     }
686   }
687 
688   PetscCall(PetscFree(rtmp));
689   PetscCall(PetscFree2(il,jl));
690   PetscCall(PetscFree3(dk,uik,work));
691   PetscCall(PetscFree(pivots));
692   if (a->permute) {
693     PetscCall(PetscFree(aa));
694   }
695 
696   PetscCall(ISRestoreIndices(perm,&perm_ptr));
697 
698   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
699   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
700   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
701   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;
702 
703   C->assembled    = PETSC_TRUE;
704   C->preallocated = PETSC_TRUE;
705 
706   PetscCall(PetscLogFlops(1.3333*bs*bs2*b->mbs)); /* from inverting diagonal blocks */
707   PetscFunctionReturn(0);
708 }
709 
710 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
711 {
712   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
713   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
714   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
715   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
716   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
717   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
718   MatScalar      *work;
719   PetscInt       *pivots;
720   PetscBool      allowzeropivot,zeropivotdetected;
721 
722   PetscFunctionBegin;
723   PetscCall(PetscCalloc1(bs2*mbs,&rtmp));
724   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
725   il[0] = 0;
726   for (i=0; i<mbs; i++) jl[i] = mbs;
727 
728   PetscCall(PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work));
729   PetscCall(PetscMalloc1(bs,&pivots));
730   allowzeropivot = PetscNot(A->erroriffailure);
731 
732   ai = a->i; aj = a->j; aa = a->a;
733 
734   /* for each row k */
735   for (k = 0; k<mbs; k++) {
736 
737     /*initialize k-th row with elements nonzero in row k of A */
738     jmin = ai[k]; jmax = ai[k+1];
739     ap   = aa + jmin*bs2;
740     for (j = jmin; j < jmax; j++) {
741       vj       = aj[j];   /* block col. index */
742       rtmp_ptr = rtmp + vj*bs2;
743       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
744     }
745 
746     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
747     PetscCall(PetscArraycpy(dk,rtmp+k*bs2,bs2));
748     i    = jl[k]; /* first row to be added to k_th row  */
749 
750     while (i < k) {
751       nexti = jl[i]; /* next row to be added to k_th row */
752 
753       /* compute multiplier */
754       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
755 
756       /* uik = -inv(Di)*U_bar(i,k) */
757       diag = ba + i*bs2;
758       u    = ba + ili*bs2;
759       PetscCall(PetscArrayzero(uik,bs2));
760       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
761 
762       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
763       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
764       PetscCall(PetscLogFlops(2.0*bs*bs2));
765 
766       /* update -U(i,k) */
767       PetscCall(PetscArraycpy(ba+ili*bs2,uik,bs2));
768 
769       /* add multiple of row i to k-th row ... */
770       jmin = ili + 1; jmax = bi[i+1];
771       if (jmin < jmax) {
772         for (j=jmin; j<jmax; j++) {
773           /* rtmp += -U(i,k)^T * U_bar(i,j) */
774           rtmp_ptr = rtmp + bj[j]*bs2;
775           u        = ba + j*bs2;
776           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
777         }
778         PetscCall(PetscLogFlops(2.0*bs*bs2*(jmax-jmin)));
779 
780         /* ... add i to row list for next nonzero entry */
781         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
782         j     = bj[jmin];
783         jl[i] = jl[j]; jl[j] = i; /* update jl */
784       }
785       i = nexti;
786     }
787 
788     /* save nonzero entries in k-th row of U ... */
789 
790     /* invert diagonal block */
791     diag = ba+k*bs2;
792     PetscCall(PetscArraycpy(diag,dk,bs2));
793 
794     PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected));
795     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
796 
797     jmin = bi[k]; jmax = bi[k+1];
798     if (jmin < jmax) {
799       for (j=jmin; j<jmax; j++) {
800         vj       = bj[j];      /* block col. index of U */
801         u        = ba + j*bs2;
802         rtmp_ptr = rtmp + vj*bs2;
803         for (k1=0; k1<bs2; k1++) {
804           *u++        = *rtmp_ptr;
805           *rtmp_ptr++ = 0.0;
806         }
807       }
808 
809       /* ... add k to row list for first nonzero entry in k-th row */
810       il[k] = jmin;
811       i     = bj[jmin];
812       jl[k] = jl[i]; jl[i] = k;
813     }
814   }
815 
816   PetscCall(PetscFree(rtmp));
817   PetscCall(PetscFree2(il,jl));
818   PetscCall(PetscFree3(dk,uik,work));
819   PetscCall(PetscFree(pivots));
820 
821   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
822   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
823   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
824   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
825   C->assembled           = PETSC_TRUE;
826   C->preallocated        = PETSC_TRUE;
827 
828   PetscCall(PetscLogFlops(1.3333*bs*bs2*b->mbs)); /* from inverting diagonal blocks */
829   PetscFunctionReturn(0);
830 }
831 
832 /*
833     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
834     Version for blocks 2 by 2.
835 */
836 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
837 {
838   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
839   IS             perm = b->row;
840   const PetscInt *ai,*aj,*perm_ptr;
841   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
842   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
843   MatScalar      *ba = b->a,*aa,*ap;
844   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
845   PetscReal      shift = info->shiftamount;
846   PetscBool      allowzeropivot,zeropivotdetected;
847 
848   PetscFunctionBegin;
849   allowzeropivot = PetscNot(A->erroriffailure);
850 
851   /* initialization */
852   /* il and jl record the first nonzero element in each row of the accessing
853      window U(0:k, k:mbs-1).
854      jl:    list of rows to be added to uneliminated rows
855             i>= k: jl(i) is the first row to be added to row i
856             i<  k: jl(i) is the row following row i in some list of rows
857             jl(i) = mbs indicates the end of a list
858      il(i): points to the first nonzero element in columns k,...,mbs-1 of
859             row i of U */
860   PetscCall(PetscCalloc1(4*mbs,&rtmp));
861   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
862   il[0] = 0;
863   for (i=0; i<mbs; i++) jl[i] = mbs;
864 
865   PetscCall(ISGetIndices(perm,&perm_ptr));
866 
867   /* check permutation */
868   if (!a->permute) {
869     ai = a->i; aj = a->j; aa = a->a;
870   } else {
871     ai   = a->inew; aj = a->jnew;
872     PetscCall(PetscMalloc1(4*ai[mbs],&aa));
873     PetscCall(PetscArraycpy(aa,a->a,4*ai[mbs]));
874     PetscCall(PetscMalloc1(ai[mbs],&a2anew));
875     PetscCall(PetscArraycpy(a2anew,a->a2anew,ai[mbs]));
876 
877     for (i=0; i<mbs; i++) {
878       jmin = ai[i]; jmax = ai[i+1];
879       for (j=jmin; j<jmax; j++) {
880         while (a2anew[j] != j) {
881           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
882           for (k1=0; k1<4; k1++) {
883             dk[k1]     = aa[k*4+k1];
884             aa[k*4+k1] = aa[j*4+k1];
885             aa[j*4+k1] = dk[k1];
886           }
887         }
888         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
889         if (i > aj[j]) {
890           ap    = aa + j*4;  /* ptr to the beginning of the block */
891           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
892           ap[1] = ap[2];
893           ap[2] = dk[1];
894         }
895       }
896     }
897     PetscCall(PetscFree(a2anew));
898   }
899 
900   /* for each row k */
901   for (k = 0; k<mbs; k++) {
902 
903     /*initialize k-th row with elements nonzero in row perm(k) of A */
904     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
905     ap   = aa + jmin*4;
906     for (j = jmin; j < jmax; j++) {
907       vj       = perm_ptr[aj[j]];   /* block col. index */
908       rtmp_ptr = rtmp + vj*4;
909       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
910     }
911 
912     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
913     PetscCall(PetscArraycpy(dk,rtmp+k*4,4));
914     i    = jl[k]; /* first row to be added to k_th row  */
915 
916     while (i < k) {
917       nexti = jl[i]; /* next row to be added to k_th row */
918 
919       /* compute multiplier */
920       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
921 
922       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
923       diag   = ba + i*4;
924       u      = ba + ili*4;
925       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
926       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
927       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
928       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
929 
930       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
931       dk[0] += uik[0]*u[0] + uik[1]*u[1];
932       dk[1] += uik[2]*u[0] + uik[3]*u[1];
933       dk[2] += uik[0]*u[2] + uik[1]*u[3];
934       dk[3] += uik[2]*u[2] + uik[3]*u[3];
935 
936       PetscCall(PetscLogFlops(16.0*2.0));
937 
938       /* update -U(i,k): ba[ili] = uik */
939       PetscCall(PetscArraycpy(ba+ili*4,uik,4));
940 
941       /* add multiple of row i to k-th row ... */
942       jmin = ili + 1; jmax = bi[i+1];
943       if (jmin < jmax) {
944         for (j=jmin; j<jmax; j++) {
945           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
946           rtmp_ptr     = rtmp + bj[j]*4;
947           u            = ba + j*4;
948           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
949           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
950           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
951           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
952         }
953         PetscCall(PetscLogFlops(16.0*(jmax-jmin)));
954 
955         /* ... add i to row list for next nonzero entry */
956         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
957         j     = bj[jmin];
958         jl[i] = jl[j]; jl[j] = i; /* update jl */
959       }
960       i = nexti;
961     }
962 
963     /* save nonzero entries in k-th row of U ... */
964 
965     /* invert diagonal block */
966     diag = ba+k*4;
967     PetscCall(PetscArraycpy(diag,dk,4));
968     PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
969     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
970 
971     jmin = bi[k]; jmax = bi[k+1];
972     if (jmin < jmax) {
973       for (j=jmin; j<jmax; j++) {
974         vj       = bj[j];      /* block col. index of U */
975         u        = ba + j*4;
976         rtmp_ptr = rtmp + vj*4;
977         for (k1=0; k1<4; k1++) {
978           *u++        = *rtmp_ptr;
979           *rtmp_ptr++ = 0.0;
980         }
981       }
982 
983       /* ... add k to row list for first nonzero entry in k-th row */
984       il[k] = jmin;
985       i     = bj[jmin];
986       jl[k] = jl[i]; jl[i] = k;
987     }
988   }
989 
990   PetscCall(PetscFree(rtmp));
991   PetscCall(PetscFree2(il,jl));
992   if (a->permute) {
993     PetscCall(PetscFree(aa));
994   }
995   PetscCall(ISRestoreIndices(perm,&perm_ptr));
996 
997   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
998   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
999   C->assembled           = PETSC_TRUE;
1000   C->preallocated        = PETSC_TRUE;
1001 
1002   PetscCall(PetscLogFlops(1.3333*8*b->mbs)); /* from inverting diagonal blocks */
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 /*
1007       Version for when blocks are 2 by 2 Using natural ordering
1008 */
1009 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1010 {
1011   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
1012   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1013   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1014   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1015   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1016   PetscReal      shift = info->shiftamount;
1017   PetscBool      allowzeropivot,zeropivotdetected;
1018 
1019   PetscFunctionBegin;
1020   allowzeropivot = PetscNot(A->erroriffailure);
1021 
1022   /* initialization */
1023   /* il and jl record the first nonzero element in each row of the accessing
1024      window U(0:k, k:mbs-1).
1025      jl:    list of rows to be added to uneliminated rows
1026             i>= k: jl(i) is the first row to be added to row i
1027             i<  k: jl(i) is the row following row i in some list of rows
1028             jl(i) = mbs indicates the end of a list
1029      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1030             row i of U */
1031   PetscCall(PetscCalloc1(4*mbs,&rtmp));
1032   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
1033   il[0] = 0;
1034   for (i=0; i<mbs; i++) jl[i] = mbs;
1035 
1036   ai = a->i; aj = a->j; aa = a->a;
1037 
1038   /* for each row k */
1039   for (k = 0; k<mbs; k++) {
1040 
1041     /*initialize k-th row with elements nonzero in row k of A */
1042     jmin = ai[k]; jmax = ai[k+1];
1043     ap   = aa + jmin*4;
1044     for (j = jmin; j < jmax; j++) {
1045       vj       = aj[j];   /* block col. index */
1046       rtmp_ptr = rtmp + vj*4;
1047       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1048     }
1049 
1050     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1051     PetscCall(PetscArraycpy(dk,rtmp+k*4,4));
1052     i    = jl[k]; /* first row to be added to k_th row  */
1053 
1054     while (i < k) {
1055       nexti = jl[i]; /* next row to be added to k_th row */
1056 
1057       /* compute multiplier */
1058       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1059 
1060       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1061       diag   = ba + i*4;
1062       u      = ba + ili*4;
1063       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1064       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1065       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1066       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1067 
1068       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1069       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1070       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1071       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1072       dk[3] += uik[2]*u[2] + uik[3]*u[3];
1073 
1074       PetscCall(PetscLogFlops(16.0*2.0));
1075 
1076       /* update -U(i,k): ba[ili] = uik */
1077       PetscCall(PetscArraycpy(ba+ili*4,uik,4));
1078 
1079       /* add multiple of row i to k-th row ... */
1080       jmin = ili + 1; jmax = bi[i+1];
1081       if (jmin < jmax) {
1082         for (j=jmin; j<jmax; j++) {
1083           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1084           rtmp_ptr     = rtmp + bj[j]*4;
1085           u            = ba + j*4;
1086           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1087           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1088           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1089           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1090         }
1091         PetscCall(PetscLogFlops(16.0*(jmax-jmin)));
1092 
1093         /* ... add i to row list for next nonzero entry */
1094         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1095         j     = bj[jmin];
1096         jl[i] = jl[j]; jl[j] = i; /* update jl */
1097       }
1098       i = nexti;
1099     }
1100 
1101     /* save nonzero entries in k-th row of U ... */
1102 
1103     /* invert diagonal block */
1104     diag = ba+k*4;
1105     PetscCall(PetscArraycpy(diag,dk,4));
1106     PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
1107     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1108 
1109     jmin = bi[k]; jmax = bi[k+1];
1110     if (jmin < jmax) {
1111       for (j=jmin; j<jmax; j++) {
1112         vj       = bj[j];      /* block col. index of U */
1113         u        = ba + j*4;
1114         rtmp_ptr = rtmp + vj*4;
1115         for (k1=0; k1<4; k1++) {
1116           *u++        = *rtmp_ptr;
1117           *rtmp_ptr++ = 0.0;
1118         }
1119       }
1120 
1121       /* ... add k to row list for first nonzero entry in k-th row */
1122       il[k] = jmin;
1123       i     = bj[jmin];
1124       jl[k] = jl[i]; jl[i] = k;
1125     }
1126   }
1127 
1128   PetscCall(PetscFree(rtmp));
1129   PetscCall(PetscFree2(il,jl));
1130 
1131   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1132   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1133   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1134   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1135   C->assembled           = PETSC_TRUE;
1136   C->preallocated        = PETSC_TRUE;
1137 
1138   PetscCall(PetscLogFlops(1.3333*8*b->mbs)); /* from inverting diagonal blocks */
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*
1143     Numeric U^T*D*U factorization for SBAIJ format.
1144     Version for blocks are 1 by 1.
1145 */
1146 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1147 {
1148   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1149   IS             ip=b->row;
1150   const PetscInt *ai,*aj,*rip;
1151   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1152   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1153   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1154   PetscReal      rs;
1155   FactorShiftCtx sctx;
1156 
1157   PetscFunctionBegin;
1158   /* MatPivotSetUp(): initialize shift context sctx */
1159   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1160 
1161   PetscCall(ISGetIndices(ip,&rip));
1162   if (!a->permute) {
1163     ai = a->i; aj = a->j; aa = a->a;
1164   } else {
1165     ai     = a->inew; aj = a->jnew;
1166     nz     = ai[mbs];
1167     PetscCall(PetscMalloc1(nz,&aa));
1168     a2anew = a->a2anew;
1169     bval   = a->a;
1170     for (j=0; j<nz; j++) {
1171       aa[a2anew[j]] = *(bval++);
1172     }
1173   }
1174 
1175   /* initialization */
1176   /* il and jl record the first nonzero element in each row of the accessing
1177      window U(0:k, k:mbs-1).
1178      jl:    list of rows to be added to uneliminated rows
1179             i>= k: jl(i) is the first row to be added to row i
1180             i<  k: jl(i) is the row following row i in some list of rows
1181             jl(i) = mbs indicates the end of a list
1182      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1183             row i of U */
1184   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl));
1185 
1186   do {
1187     sctx.newshift = PETSC_FALSE;
1188     il[0] = 0;
1189     for (i=0; i<mbs; i++) {
1190       rtmp[i] = 0.0; jl[i] = mbs;
1191     }
1192 
1193     for (k = 0; k<mbs; k++) {
1194       /*initialize k-th row by the perm[k]-th row of A */
1195       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1196       bval = ba + bi[k];
1197       for (j = jmin; j < jmax; j++) {
1198         col       = rip[aj[j]];
1199         rtmp[col] = aa[j];
1200         *bval++   = 0.0; /* for in-place factorization */
1201       }
1202 
1203       /* shift the diagonal of the matrix */
1204       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1205 
1206       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1207       dk = rtmp[k];
1208       i  = jl[k]; /* first row to be added to k_th row  */
1209 
1210       while (i < k) {
1211         nexti = jl[i]; /* next row to be added to k_th row */
1212 
1213         /* compute multiplier, update diag(k) and U(i,k) */
1214         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1215         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
1216         dk     += uikdi*ba[ili];
1217         ba[ili] = uikdi; /* -U(i,k) */
1218 
1219         /* add multiple of row i to k-th row */
1220         jmin = ili + 1; jmax = bi[i+1];
1221         if (jmin < jmax) {
1222           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1223           PetscCall(PetscLogFlops(2.0*(jmax-jmin)));
1224 
1225           /* update il and jl for row i */
1226           il[i] = jmin;
1227           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1228         }
1229         i = nexti;
1230       }
1231 
1232       /* shift the diagonals when zero pivot is detected */
1233       /* compute rs=sum of abs(off-diagonal) */
1234       rs   = 0.0;
1235       jmin = bi[k]+1;
1236       nz   = bi[k+1] - jmin;
1237       if (nz) {
1238         bcol = bj + jmin;
1239         while (nz--) {
1240           rs += PetscAbsScalar(rtmp[*bcol]);
1241           bcol++;
1242         }
1243       }
1244 
1245       sctx.rs = rs;
1246       sctx.pv = dk;
1247       PetscCall(MatPivotCheck(C,A,info,&sctx,k));
1248       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1249       dk = sctx.pv;
1250 
1251       /* copy data into U(k,:) */
1252       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1253       jmin      = bi[k]+1; jmax = bi[k+1];
1254       if (jmin < jmax) {
1255         for (j=jmin; j<jmax; j++) {
1256           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1257         }
1258         /* add the k-th row into il and jl */
1259         il[k] = jmin;
1260         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1261       }
1262     }
1263   } while (sctx.newshift);
1264   PetscCall(PetscFree3(rtmp,il,jl));
1265   if (a->permute) PetscCall(PetscFree(aa));
1266 
1267   PetscCall(ISRestoreIndices(ip,&rip));
1268 
1269   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1270   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1271   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1272   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1273   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1274   C->assembled           = PETSC_TRUE;
1275   C->preallocated        = PETSC_TRUE;
1276 
1277   PetscCall(PetscLogFlops(C->rmap->N));
1278   if (sctx.nshift) {
1279     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1280       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1281     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1282       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1283     }
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 /*
1289   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1290   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1291 */
1292 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1293 {
1294   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1295   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1296   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1297   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1298   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1299   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1300   FactorShiftCtx sctx;
1301   PetscReal      rs;
1302   MatScalar      d,*v;
1303 
1304   PetscFunctionBegin;
1305   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r));
1306 
1307   /* MatPivotSetUp(): initialize shift context sctx */
1308   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1309 
1310   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1311     sctx.shift_top = info->zeropivot;
1312 
1313     PetscCall(PetscArrayzero(rtmp,mbs));
1314 
1315     for (i=0; i<mbs; i++) {
1316       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1317       d        = (aa)[a->diag[i]];
1318       rtmp[i] += -PetscRealPart(d);  /* diagonal entry */
1319       ajtmp    = aj + ai[i] + 1;     /* exclude diagonal */
1320       v        = aa + ai[i] + 1;
1321       nz       = ai[i+1] - ai[i] - 1;
1322       for (j=0; j<nz; j++) {
1323         rtmp[i]        += PetscAbsScalar(v[j]);
1324         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1325       }
1326       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1327     }
1328     sctx.shift_top *= 1.1;
1329     sctx.nshift_max = 5;
1330     sctx.shift_lo   = 0.;
1331     sctx.shift_hi   = 1.;
1332   }
1333 
1334   /* allocate working arrays
1335      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1336      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1337   */
1338   do {
1339     sctx.newshift = PETSC_FALSE;
1340 
1341     for (i=0; i<mbs; i++) c2r[i] = mbs;
1342     if (mbs) il[0] = 0;
1343 
1344     for (k = 0; k<mbs; k++) {
1345       /* zero rtmp */
1346       nz    = bi[k+1] - bi[k];
1347       bjtmp = bj + bi[k];
1348       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
1349 
1350       /* load in initial unfactored row */
1351       bval = ba + bi[k];
1352       jmin = ai[k]; jmax = ai[k+1];
1353       for (j = jmin; j < jmax; j++) {
1354         col       = aj[j];
1355         rtmp[col] = aa[j];
1356         *bval++   = 0.0; /* for in-place factorization */
1357       }
1358       /* shift the diagonal of the matrix: ZeropivotApply() */
1359       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
1360 
1361       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1362       dk = rtmp[k];
1363       i  = c2r[k]; /* first row to be added to k_th row  */
1364 
1365       while (i < k) {
1366         nexti = c2r[i]; /* next row to be added to k_th row */
1367 
1368         /* compute multiplier, update diag(k) and U(i,k) */
1369         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1370         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
1371         dk     += uikdi*ba[ili]; /* update diag[k] */
1372         ba[ili] = uikdi; /* -U(i,k) */
1373 
1374         /* add multiple of row i to k-th row */
1375         jmin = ili + 1; jmax = bi[i+1];
1376         if (jmin < jmax) {
1377           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1378           /* update il and c2r for row i */
1379           il[i] = jmin;
1380           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1381         }
1382         i = nexti;
1383       }
1384 
1385       /* copy data into U(k,:) */
1386       rs   = 0.0;
1387       jmin = bi[k]; jmax = bi[k+1]-1;
1388       if (jmin < jmax) {
1389         for (j=jmin; j<jmax; j++) {
1390           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1391         }
1392         /* add the k-th row into il and c2r */
1393         il[k] = jmin;
1394         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1395       }
1396 
1397       sctx.rs = rs;
1398       sctx.pv = dk;
1399       PetscCall(MatPivotCheck(B,A,info,&sctx,k));
1400       if (sctx.newshift) break;
1401       dk = sctx.pv;
1402 
1403       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1404     }
1405   } while (sctx.newshift);
1406 
1407   PetscCall(PetscFree3(rtmp,il,c2r));
1408 
1409   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1410   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1411   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1412   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1413   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1414   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1415 
1416   B->assembled    = PETSC_TRUE;
1417   B->preallocated = PETSC_TRUE;
1418 
1419   PetscCall(PetscLogFlops(B->rmap->n));
1420 
1421   /* MatPivotView() */
1422   if (sctx.nshift) {
1423     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1424       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
1425     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1426       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1427     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1428       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
1429     }
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1435 {
1436   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1437   PetscInt       i,j,mbs = a->mbs;
1438   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1439   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1440   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1441   PetscReal      rs;
1442   FactorShiftCtx sctx;
1443 
1444   PetscFunctionBegin;
1445   /* MatPivotSetUp(): initialize shift context sctx */
1446   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
1447 
1448   /* initialization */
1449   /* il and jl record the first nonzero element in each row of the accessing
1450      window U(0:k, k:mbs-1).
1451      jl:    list of rows to be added to uneliminated rows
1452             i>= k: jl(i) is the first row to be added to row i
1453             i<  k: jl(i) is the row following row i in some list of rows
1454             jl(i) = mbs indicates the end of a list
1455      il(i): points to the first nonzero element in U(i,k:mbs-1)
1456   */
1457   PetscCall(PetscMalloc1(mbs,&rtmp));
1458   PetscCall(PetscMalloc2(mbs,&il,mbs,&jl));
1459 
1460   do {
1461     sctx.newshift = PETSC_FALSE;
1462     il[0] = 0;
1463     for (i=0; i<mbs; i++) {
1464       rtmp[i] = 0.0; jl[i] = mbs;
1465     }
1466 
1467     for (k = 0; k<mbs; k++) {
1468       /*initialize k-th row with elements nonzero in row perm(k) of A */
1469       nz   = ai[k+1] - ai[k];
1470       acol = aj + ai[k];
1471       aval = aa + ai[k];
1472       bval = ba + bi[k];
1473       while (nz--) {
1474         rtmp[*acol++] = *aval++;
1475         *bval++       = 0.0; /* for in-place factorization */
1476       }
1477 
1478       /* shift the diagonal of the matrix */
1479       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1480 
1481       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1482       dk = rtmp[k];
1483       i  = jl[k]; /* first row to be added to k_th row  */
1484 
1485       while (i < k) {
1486         nexti = jl[i]; /* next row to be added to k_th row */
1487         /* compute multiplier, update D(k) and U(i,k) */
1488         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1489         uikdi   = -ba[ili]*ba[bi[i]];
1490         dk     += uikdi*ba[ili];
1491         ba[ili] = uikdi; /* -U(i,k) */
1492 
1493         /* add multiple of row i to k-th row ... */
1494         jmin = ili + 1;
1495         nz   = bi[i+1] - jmin;
1496         if (nz > 0) {
1497           bcol = bj + jmin;
1498           bval = ba + jmin;
1499           PetscCall(PetscLogFlops(2.0*nz));
1500           while (nz--) rtmp[*bcol++] += uikdi*(*bval++);
1501 
1502           /* update il and jl for i-th row */
1503           il[i] = jmin;
1504           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1505         }
1506         i = nexti;
1507       }
1508 
1509       /* shift the diagonals when zero pivot is detected */
1510       /* compute rs=sum of abs(off-diagonal) */
1511       rs   = 0.0;
1512       jmin = bi[k]+1;
1513       nz   = bi[k+1] - jmin;
1514       if (nz) {
1515         bcol = bj + jmin;
1516         while (nz--) {
1517           rs += PetscAbsScalar(rtmp[*bcol]);
1518           bcol++;
1519         }
1520       }
1521 
1522       sctx.rs = rs;
1523       sctx.pv = dk;
1524       PetscCall(MatPivotCheck(C,A,info,&sctx,k));
1525       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1526       dk = sctx.pv;
1527 
1528       /* copy data into U(k,:) */
1529       ba[bi[k]] = 1.0/dk;
1530       jmin      = bi[k]+1;
1531       nz        = bi[k+1] - jmin;
1532       if (nz) {
1533         bcol = bj + jmin;
1534         bval = ba + jmin;
1535         while (nz--) {
1536           *bval++       = rtmp[*bcol];
1537           rtmp[*bcol++] = 0.0;
1538         }
1539         /* add k-th row into il and jl */
1540         il[k] = jmin;
1541         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1542       }
1543     } /* end of for (k = 0; k<mbs; k++) */
1544   } while (sctx.newshift);
1545   PetscCall(PetscFree(rtmp));
1546   PetscCall(PetscFree2(il,jl));
1547 
1548   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1549   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1550   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1551   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1552   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1553 
1554   C->assembled    = PETSC_TRUE;
1555   C->preallocated = PETSC_TRUE;
1556 
1557   PetscCall(PetscLogFlops(C->rmap->N));
1558   if (sctx.nshift) {
1559     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1560       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1561     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1562       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
1563     }
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1569 {
1570   Mat            C;
1571 
1572   PetscFunctionBegin;
1573   PetscCall(MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C));
1574   PetscCall(MatCholeskyFactorSymbolic(C,A,perm,info));
1575   PetscCall(MatCholeskyFactorNumeric(C,A,info));
1576 
1577   A->ops->solve          = C->ops->solve;
1578   A->ops->solvetranspose = C->ops->solvetranspose;
1579 
1580   PetscCall(MatHeaderMerge(A,&C));
1581   PetscFunctionReturn(0);
1582 }
1583