xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision a5ff213d897f5fcaef42ae26f8ae0a8e003d03e2)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include "src/mat/impls/baij/seq/baij.h"
5 #include "src/inline/ilu.h"
6 
7 /* ------------------------------------------------------------*/
8 /*
9       Version for when blocks are 2 by 2
10 */
11 #undef __FUNCT__
12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,MatFactorInfo *info,Mat *B)
14 {
15   Mat            C = *B;
16   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
17   IS             isrow = b->row,isicol = b->icol;
18   PetscErrorCode ierr;
19   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20   PetscInt       *ajtmpold,*ajtmp,nz,row;
21   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
23   MatScalar      p1,p2,p3,p4;
24   MatScalar      *ba = b->a,*aa = a->a;
25 
26   PetscFunctionBegin;
27   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
30 
31   for (i=0; i<n; i++) {
32     nz    = bi[i+1] - bi[i];
33     ajtmp = bj + bi[i];
34     for  (j=0; j<nz; j++) {
35       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
36     }
37     /* load in initial (unfactored row) */
38     idx      = r[i];
39     nz       = ai[idx+1] - ai[idx];
40     ajtmpold = aj + ai[idx];
41     v        = aa + 4*ai[idx];
42     for (j=0; j<nz; j++) {
43       x    = rtmp+4*ic[ajtmpold[j]];
44       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
45       v    += 4;
46     }
47     row = *ajtmp++;
48     while (row < i) {
49       pc = rtmp + 4*row;
50       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
51       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
52         pv = ba + 4*diag_offset[row];
53         pj = bj + diag_offset[row] + 1;
54         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
55         pc[0] = m1 = p1*x1 + p3*x2;
56         pc[1] = m2 = p2*x1 + p4*x2;
57         pc[2] = m3 = p1*x3 + p3*x4;
58         pc[3] = m4 = p2*x3 + p4*x4;
59         nz = bi[row+1] - diag_offset[row] - 1;
60         pv += 4;
61         for (j=0; j<nz; j++) {
62           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
63           x    = rtmp + 4*pj[j];
64           x[0] -= m1*x1 + m3*x2;
65           x[1] -= m2*x1 + m4*x2;
66           x[2] -= m1*x3 + m3*x4;
67           x[3] -= m2*x3 + m4*x4;
68           pv   += 4;
69         }
70         PetscLogFlops(16*nz+12);
71       }
72       row = *ajtmp++;
73     }
74     /* finished row so stick it into b->a */
75     pv = ba + 4*bi[i];
76     pj = bj + bi[i];
77     nz = bi[i+1] - bi[i];
78     for (j=0; j<nz; j++) {
79       x     = rtmp+4*pj[j];
80       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
81       pv   += 4;
82     }
83     /* invert diagonal block */
84     w = ba + 4*diag_offset[i];
85     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
86   }
87 
88   ierr = PetscFree(rtmp);CHKERRQ(ierr);
89   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
90   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
91   C->factor = FACTOR_LU;
92   C->assembled = PETSC_TRUE;
93   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
94   PetscFunctionReturn(0);
95 }
96 /*
97       Version for when blocks are 2 by 2 Using natural ordering
98 */
99 #undef __FUNCT__
100 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
102 {
103   Mat            C = *B;
104   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105   PetscErrorCode ierr;
106   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107   PetscInt       *ajtmpold,*ajtmp,nz,row;
108   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
110   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
111   MatScalar      *ba = b->a,*aa = a->a;
112 
113   PetscFunctionBegin;
114   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
115 
116   for (i=0; i<n; i++) {
117     nz    = bi[i+1] - bi[i];
118     ajtmp = bj + bi[i];
119     for  (j=0; j<nz; j++) {
120       x = rtmp+4*ajtmp[j];
121       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
122     }
123     /* load in initial (unfactored row) */
124     nz       = ai[i+1] - ai[i];
125     ajtmpold = aj + ai[i];
126     v        = aa + 4*ai[i];
127     for (j=0; j<nz; j++) {
128       x    = rtmp+4*ajtmpold[j];
129       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
130       v    += 4;
131     }
132     row = *ajtmp++;
133     while (row < i) {
134       pc  = rtmp + 4*row;
135       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
136       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
137         pv = ba + 4*diag_offset[row];
138         pj = bj + diag_offset[row] + 1;
139         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
140         pc[0] = m1 = p1*x1 + p3*x2;
141         pc[1] = m2 = p2*x1 + p4*x2;
142         pc[2] = m3 = p1*x3 + p3*x4;
143         pc[3] = m4 = p2*x3 + p4*x4;
144         nz = bi[row+1] - diag_offset[row] - 1;
145         pv += 4;
146         for (j=0; j<nz; j++) {
147           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
148           x    = rtmp + 4*pj[j];
149           x[0] -= m1*x1 + m3*x2;
150           x[1] -= m2*x1 + m4*x2;
151           x[2] -= m1*x3 + m3*x4;
152           x[3] -= m2*x3 + m4*x4;
153           pv   += 4;
154         }
155         PetscLogFlops(16*nz+12);
156       }
157       row = *ajtmp++;
158     }
159     /* finished row so stick it into b->a */
160     pv = ba + 4*bi[i];
161     pj = bj + bi[i];
162     nz = bi[i+1] - bi[i];
163     for (j=0; j<nz; j++) {
164       x      = rtmp+4*pj[j];
165       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
166       pv   += 4;
167     }
168     /* invert diagonal block */
169     w = ba + 4*diag_offset[i];
170     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
171     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
172   }
173 
174   ierr = PetscFree(rtmp);CHKERRQ(ierr);
175   C->factor    = FACTOR_LU;
176   C->assembled = PETSC_TRUE;
177   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
178   PetscFunctionReturn(0);
179 }
180 
181 /* ----------------------------------------------------------- */
182 /*
183      Version for when blocks are 1 by 1.
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,MatFactorInfo *info,Mat *B)
188 {
189   Mat            C = *B;
190   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
191   IS             isrow = b->row,isicol = b->icol;
192   PetscErrorCode ierr;
193   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
194   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195   PetscInt       *diag_offset = b->diag,diag,*pj;
196   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
197   MatScalar      *ba = b->a,*aa = a->a;
198 
199   PetscFunctionBegin;
200   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
201   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
203 
204   for (i=0; i<n; i++) {
205     nz    = bi[i+1] - bi[i];
206     ajtmp = bj + bi[i];
207     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
208 
209     /* load in initial (unfactored row) */
210     nz       = ai[r[i]+1] - ai[r[i]];
211     ajtmpold = aj + ai[r[i]];
212     v        = aa + ai[r[i]];
213     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
214 
215     row = *ajtmp++;
216     while (row < i) {
217       pc = rtmp + row;
218       if (*pc != 0.0) {
219         pv         = ba + diag_offset[row];
220         pj         = bj + diag_offset[row] + 1;
221         multiplier = *pc * *pv++;
222         *pc        = multiplier;
223         nz         = bi[row+1] - diag_offset[row] - 1;
224         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225         PetscLogFlops(1+2*nz);
226       }
227       row = *ajtmp++;
228     }
229     /* finished row so stick it into b->a */
230     pv = ba + bi[i];
231     pj = bj + bi[i];
232     nz = bi[i+1] - bi[i];
233     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
234     diag = diag_offset[i] - bi[i];
235     /* check pivot entry for current row */
236     if (pv[diag] == 0.0) {
237       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
238     }
239     pv[diag] = 1.0/pv[diag];
240   }
241 
242   ierr = PetscFree(rtmp);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
245   C->factor    = FACTOR_LU;
246   C->assembled = PETSC_TRUE;
247   PetscLogFlops(C->n);
248   PetscFunctionReturn(0);
249 }
250 
251 
252 /* ----------------------------------------------------------- */
253 #undef __FUNCT__
254 #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
256 {
257   PetscErrorCode ierr;
258   Mat            C;
259 
260   PetscFunctionBegin;
261   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
262   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
263   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
265   PetscFunctionReturn(0);
266 }
267 
268 #include "src/mat/impls/sbaij/seq/sbaij.h"
269 #undef __FUNCT__
270 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N"
271 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,MatFactorInfo *info,Mat *B)
272 {
273   PetscErrorCode ierr;
274   Mat            C = *B;
275   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
276   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
277   IS             ip=b->row;
278   PetscInt       *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol;
279   PetscInt       *ai=a->i,*aj=a->j;
280   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
281   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
282   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
283   PetscTruth     damp,chshift;
284   PetscInt       nshift=0,ndamp=0;
285 
286   PetscFunctionBegin;
287   if (bs > 1) {
288     if (!a->sbaijMat){
289       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
290     }
291     ierr = (a->sbaijMat)->ops->choleskyfactornumeric(a->sbaijMat,info,B);CHKERRQ(ierr);
292     ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
293     a->sbaijMat = PETSC_NULL;
294     PetscFunctionReturn(0);
295   }
296 
297   /* initialization */
298   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
299   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
300   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
301   jl   = il + mbs;
302   rtmp = (MatScalar*)(jl + mbs);
303 
304   shift_amount = 0;
305   do {
306     damp = PETSC_FALSE;
307     chshift = PETSC_FALSE;
308     for (i=0; i<mbs; i++) {
309       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
310     }
311 
312     for (k = 0; k<mbs; k++){
313       bval = ba + bi[k];
314       /* initialize k-th row by the perm[k]-th row of A */
315       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
316       for (j = jmin; j < jmax; j++){
317         col = rip[aj[j]];
318         if (col >= k){ /* only take upper triangular entry */
319           rtmp[col] = aa[j];
320           *bval++  = 0.0; /* for in-place factorization */
321         }
322       }
323       /* damp the diagonal of the matrix */
324       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
325 
326       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
327       dk = rtmp[k];
328       i = jl[k]; /* first row to be added to k_th row  */
329 
330       while (i < k){
331         nexti = jl[i]; /* next row to be added to k_th row */
332 
333         /* compute multiplier, update diag(k) and U(i,k) */
334         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
335         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
336         dk += uikdi*ba[ili];
337         ba[ili] = uikdi; /* -U(i,k) */
338 
339         /* add multiple of row i to k-th row */
340         jmin = ili + 1; jmax = bi[i+1];
341         if (jmin < jmax){
342           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
343           /* update il and jl for row i */
344           il[i] = jmin;
345           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
346         }
347         i = nexti;
348       }
349 
350       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
351 	/* calculate a shift that would make this row diagonally dominant */
352 	PetscReal rs = PetscAbs(PetscRealPart(dk));
353 	jmin      = bi[k]+1;
354 	nz        = bi[k+1] - jmin;
355 	if (nz){
356 	  bcol = bj + jmin;
357 	  bval = ba + jmin;
358 	  while (nz--){
359 	    rs += PetscAbsScalar(rtmp[*bcol++]);
360 	  }
361 	}
362 	/* if this shift is less than the previous, just up the previous
363 	   one by a bit */
364 	shift_amount = PetscMax(rs,1.1*shift_amount);
365 	chshift  = PETSC_TRUE;
366 	/* Unlike in the ILU case there is no exit condition on nshift:
367 	   we increase the shift until it converges. There is no guarantee that
368 	   this algorithm converges faster or slower, or is better or worse
369 	   than the ILU algorithm. */
370 	nshift++;
371 	break;
372       }
373       if (PetscRealPart(dk) < zeropivot){
374         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
375         if (damping > 0.0) {
376           if (ndamp) damping *= 2.0;
377           damp = PETSC_TRUE;
378           ndamp++;
379           break;
380         } else if (PetscAbsScalar(dk) < zeropivot){
381           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
382         } else {
383           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
384         }
385       }
386 
387       /* copy data into U(k,:) */
388       ba[bi[k]] = 1.0/dk; /* U(k,k) */
389       jmin = bi[k]+1; jmax = bi[k+1];
390       if (jmin < jmax) {
391         for (j=jmin; j<jmax; j++){
392           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
393         }
394         /* add the k-th row into il and jl */
395         il[k] = jmin;
396         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
397       }
398     }
399   } while (damp||chshift);
400   ierr = PetscFree(il);CHKERRQ(ierr);
401 
402   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
403   C->factor       = FACTOR_CHOLESKY;
404   C->assembled    = PETSC_TRUE;
405   C->preallocated = PETSC_TRUE;
406   PetscLogFlops(C->m);
407   if (ndamp) {
408     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
409   }
410   if (nshift) {
411     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering"
418 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *fact)
419 {
420   Mat            C = *fact;
421   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
422   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
423   PetscErrorCode ierr;
424   PetscInt       i,j,am=a->mbs,bs=A->bs;
425   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
426   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
427   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
428   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
429   PetscTruth     damp,chshift;
430   PetscInt       nshift=0;
431 
432   PetscFunctionBegin;
433   if (bs > 1) {
434     SETERRQ(PETSC_ERR_USER,"not done yet");
435   }
436 
437   /* initialization */
438   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
439   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
440   jl   = il + am;
441   rtmp = (MatScalar*)(jl + am);
442 
443   shift_amount = 0;
444   do {
445     damp = PETSC_FALSE;
446     chshift = PETSC_FALSE;
447     for (i=0; i<am; i++) {
448       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
449     }
450 
451     for (k = 0; k<am; k++){
452     /* initialize k-th row with elements nonzero in row perm(k) of A */
453       nz   = ai[k+1] - ai[k];
454       acol = aj + ai[k];
455       aval = aa + ai[k];
456       bval = ba + bi[k];
457       while (nz -- ){
458         if (*acol < k) { /* skip lower triangular entries */
459           acol++; aval++;
460         } else {
461           rtmp[*acol++] = *aval++;
462           *bval++       = 0.0; /* for in-place factorization */
463         }
464       }
465       /* damp the diagonal of the matrix */
466       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
467 
468       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
469       dk = rtmp[k];
470       i  = jl[k]; /* first row to be added to k_th row  */
471 
472       while (i < k){
473         nexti = jl[i]; /* next row to be added to k_th row */
474         /* compute multiplier, update D(k) and U(i,k) */
475         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
476         uikdi = - ba[ili]*ba[bi[i]];
477         dk   += uikdi*ba[ili];
478         ba[ili] = uikdi; /* -U(i,k) */
479 
480         /* add multiple of row i to k-th row ... */
481         jmin = ili + 1;
482         nz   = bi[i+1] - jmin;
483         if (nz > 0){
484           bcol = bj + jmin;
485           bval = ba + jmin;
486           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
487           /* update il and jl for i-th row */
488           il[i] = jmin;
489           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
490         }
491         i = nexti;
492       }
493 
494       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
495 	/* calculate a shift that would make this row diagonally dominant */
496 	PetscReal rs = PetscAbs(PetscRealPart(dk));
497 	jmin      = bi[k]+1;
498 	nz        = bi[k+1] - jmin;
499 	if (nz){
500 	  bcol = bj + jmin;
501 	  bval = ba + jmin;
502 	  while (nz--){
503 	    rs += PetscAbsScalar(rtmp[*bcol++]);
504 	  }
505 	}
506 	/* if this shift is less than the previous, just up the previous
507 	   one by a bit */
508 	shift_amount = PetscMax(rs,1.1*shift_amount);
509 	chshift  = PETSC_TRUE;
510 	/* Unlike in the ILU case there is no exit condition on nshift:
511 	   we increase the shift until it converges. There is no guarantee that
512 	   this algorithm converges faster or slower, or is better or worse
513 	   than the ILU algorithm. */
514 	nshift++;
515 	break;
516       }
517       if (PetscRealPart(dk) < zeropivot){
518         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
519         if (damping > 0.0) {
520           if (ndamp) damping *= 2.0;
521           damp = PETSC_TRUE;
522           ndamp++;
523           break;
524         } else if (PetscAbsScalar(dk) < zeropivot){
525           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
526         } else {
527           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
528         }
529       }
530 
531       /* copy data into U(k,:) */
532       ba[bi[k]] = 1.0/dk;
533       jmin      = bi[k]+1;
534       nz        = bi[k+1] - jmin;
535       if (nz){
536         bcol = bj + jmin;
537         bval = ba + jmin;
538         while (nz--){
539           *bval++       = rtmp[*bcol];
540           rtmp[*bcol++] = 0.0;
541         }
542         /* add k-th row into il and jl */
543         il[k] = jmin;
544         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
545       }
546     }
547   } while (damp||chshift);
548   ierr = PetscFree(il);CHKERRQ(ierr);
549 
550   C->factor       = FACTOR_CHOLESKY;
551   C->assembled    = PETSC_TRUE;
552   C->preallocated = PETSC_TRUE;
553   PetscLogFlops(C->m);
554   if (ndamp) {
555     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
556   }
557   if (nshift) {
558     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 #include "petscbt.h"
564 #include "src/mat/utils/freespace.h"
565 #undef __FUNCT__
566 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ"
567 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
568 {
569   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
570   Mat_SeqSBAIJ   *b;
571   Mat            B;
572   PetscErrorCode ierr;
573   PetscTruth     perm_identity;
574   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui;
575   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
576   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
577   PetscReal      fill=info->fill,levels=info->levels;
578   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
579   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
580   PetscBT        lnkbt;
581 
582   PetscFunctionBegin;
583   if (bs > 1){
584     if (!a->sbaijMat){
585       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
586     }
587     ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
588     B = *fact;
589     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
590     PetscFunctionReturn(0);
591   }
592 
593   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
594   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
595 
596   /* special case that simply copies fill pattern */
597   if (!levels && perm_identity) {
598     ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
599     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
600     for (i=0; i<am; i++) {
601       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
602     }
603     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
604     B = *fact;
605     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
606     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
607 
608     b  = (Mat_SeqSBAIJ*)B->data;
609     uj = b->j;
610     for (i=0; i<am; i++) {
611       aj = a->j + a->diag[i];
612       for (j=0; j<ui[i]; j++){
613         *uj++ = *aj++;
614       }
615       b->ilen[i] = ui[i];
616     }
617     ierr = PetscFree(ui);CHKERRQ(ierr);
618     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620 
621     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
622     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
623     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
624     PetscFunctionReturn(0);
625   }
626 
627   /* initialization */
628   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
629   ui[0] = 0;
630   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
631 
632   /* jl: linked list for storing indices of the pivot rows
633      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
634   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
635   il         = jl + am;
636   uj_ptr     = (PetscInt**)(il + am);
637   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
638   for (i=0; i<am; i++){
639     jl[i] = am; il[i] = 0;
640   }
641 
642   /* create and initialize a linked list for storing column indices of the active row k */
643   nlnk = am + 1;
644   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
645 
646   /* initial FreeSpace size is fill*(ai[am]+1) */
647   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
648   current_space = free_space;
649   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
650   current_space_lvl = free_space_lvl;
651 
652   for (k=0; k<am; k++){  /* for each active row k */
653     /* initialize lnk by the column indices of row rip[k] of A */
654     nzk   = 0;
655     ncols = ai[rip[k]+1] - ai[rip[k]];
656     ncols_upper = 0;
657     cols        = cols_lvl + am;
658     for (j=0; j<ncols; j++){
659       i = rip[*(aj + ai[rip[k]] + j)];
660       if (i >= k){ /* only take upper triangular entry */
661         cols[ncols_upper] = i;
662         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
663         ncols_upper++;
664       }
665     }
666     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
667     nzk += nlnk;
668 
669     /* update lnk by computing fill-in for each pivot row to be merged in */
670     prow = jl[k]; /* 1st pivot row */
671 
672     while (prow < k){
673       nextprow = jl[prow];
674 
675       /* merge prow into k-th row */
676       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
677       jmax = ui[prow+1];
678       ncols = jmax-jmin;
679       i     = jmin - ui[prow];
680       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
681       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
682       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
683       nzk += nlnk;
684 
685       /* update il and jl for prow */
686       if (jmin < jmax){
687         il[prow] = jmin;
688         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
689       }
690       prow = nextprow;
691     }
692 
693     /* if free space is not available, make more free space */
694     if (current_space->local_remaining<nzk) {
695       i = am - k + 1; /* num of unfactored rows */
696       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
697       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
698       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
699       reallocs++;
700     }
701 
702     /* copy data into free_space and free_space_lvl, then initialize lnk */
703     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
704 
705     /* add the k-th row into il and jl */
706     if (nzk-1 > 0){
707       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
708       jl[k] = jl[i]; jl[i] = k;
709       il[k] = ui[k] + 1;
710     }
711     uj_ptr[k]     = current_space->array;
712     uj_lvl_ptr[k] = current_space_lvl->array;
713 
714     current_space->array           += nzk;
715     current_space->local_used      += nzk;
716     current_space->local_remaining -= nzk;
717 
718     current_space_lvl->array           += nzk;
719     current_space_lvl->local_used      += nzk;
720     current_space_lvl->local_remaining -= nzk;
721 
722     ui[k+1] = ui[k] + nzk;
723   }
724 
725   if (ai[am] != 0) {
726     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
727     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
728     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
729     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
730   } else {
731      PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n");
732   }
733 
734   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
735   ierr = PetscFree(jl);CHKERRQ(ierr);
736   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
737 
738   /* destroy list of free space and other temporary array(s) */
739   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
740   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
741   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
742   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
743 
744   /* put together the new matrix in MATSEQSBAIJ format */
745   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
746   B = *fact;
747   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
748   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
749 
750   b = (Mat_SeqSBAIJ*)B->data;
751   ierr = PetscFree(b->imax);CHKERRQ(ierr);
752   b->singlemalloc = PETSC_FALSE;
753   /* the next line frees the default space generated by the Create() */
754   ierr = PetscFree(b->a);CHKERRQ(ierr);
755   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
756   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
757   b->j    = uj;
758   b->i    = ui;
759   b->diag = 0;
760   b->ilen = 0;
761   b->imax = 0;
762   b->row  = perm;
763   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
764   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
765   b->icol = perm;
766   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
767   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
768   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
769   b->maxnz = b->nz = ui[am];
770 
771   B->factor                 = FACTOR_CHOLESKY;
772   B->info.factor_mallocs    = reallocs;
773   B->info.fill_ratio_given  = fill;
774   if (ai[am] != 0) {
775     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
776   } else {
777     B->info.fill_ratio_needed = 0.0;
778   }
779   if (perm_identity){
780     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
781     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
782     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
783   } else {
784     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ"
791 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
792 {
793   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
794   Mat_SeqSBAIJ   *b;
795   Mat            B;
796   PetscErrorCode ierr;
797   PetscTruth     perm_identity;
798   PetscReal      fill = info->fill;
799   PetscInt       *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
800   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
801   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
802   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
803   PetscBT        lnkbt;
804   IS             iperm;
805 
806   PetscFunctionBegin;
807   if (bs > 1) { /* convert to seqsbaij */
808     if (!a->sbaijMat){
809       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
810     }
811     ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
812     B    = *fact;
813     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
814     PetscFunctionReturn(0);
815   }
816 
817   /* check whether perm is the identity mapping */
818   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
819   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
820 
821   if (!perm_identity){
822     /* check if perm is symmetric! */
823     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
824     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
825     for (i=0; i<mbs; i++) {
826       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
827     }
828     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
829     ierr = ISDestroy(iperm);CHKERRQ(ierr);
830   }
831 
832   /* initialization */
833   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
834   ui[0] = 0;
835 
836   /* jl: linked list for storing indices of the pivot rows
837      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
838   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
839   il     = jl + mbs;
840   cols   = il + mbs;
841   ui_ptr = (PetscInt**)(cols + mbs);
842   for (i=0; i<mbs; i++){
843     jl[i] = mbs; il[i] = 0;
844   }
845 
846   /* create and initialize a linked list for storing column indices of the active row k */
847   nlnk = mbs + 1;
848   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
849 
850   /* initial FreeSpace size is fill*(ai[mbs]+1) */
851   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
852   current_space = free_space;
853 
854   for (k=0; k<mbs; k++){  /* for each active row k */
855     /* initialize lnk by the column indices of row rip[k] of A */
856     nzk   = 0;
857     ncols = ai[rip[k]+1] - ai[rip[k]];
858     ncols_upper = 0;
859     for (j=0; j<ncols; j++){
860       i = rip[*(aj + ai[rip[k]] + j)];
861       if (i >= k){ /* only take upper triangular entry */
862         cols[ncols_upper] = i;
863         ncols_upper++;
864       }
865     }
866     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
867     nzk += nlnk;
868 
869     /* update lnk by computing fill-in for each pivot row to be merged in */
870     prow = jl[k]; /* 1st pivot row */
871 
872     while (prow < k){
873       nextprow = jl[prow];
874       /* merge prow into k-th row */
875       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
876       jmax = ui[prow+1];
877       ncols = jmax-jmin;
878       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
879       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
880       nzk += nlnk;
881 
882       /* update il and jl for prow */
883       if (jmin < jmax){
884         il[prow] = jmin;
885         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
886       }
887       prow = nextprow;
888     }
889 
890     /* if free space is not available, make more free space */
891     if (current_space->local_remaining<nzk) {
892       i = mbs - k + 1; /* num of unfactored rows */
893       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
894       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
895       reallocs++;
896     }
897 
898     /* copy data into free space, then initialize lnk */
899     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
900 
901     /* add the k-th row into il and jl */
902     if (nzk-1 > 0){
903       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
904       jl[k] = jl[i]; jl[i] = k;
905       il[k] = ui[k] + 1;
906     }
907     ui_ptr[k] = current_space->array;
908     current_space->array           += nzk;
909     current_space->local_used      += nzk;
910     current_space->local_remaining -= nzk;
911 
912     ui[k+1] = ui[k] + nzk;
913   }
914 
915   if (ai[mbs] != 0) {
916     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
917     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
918     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
919     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
920   } else {
921      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
922   }
923 
924   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
925   ierr = PetscFree(jl);CHKERRQ(ierr);
926 
927   /* destroy list of free space and other temporary array(s) */
928   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
929   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
930   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
931 
932   /* put together the new matrix in MATSEQSBAIJ format */
933   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
934   B    = *fact;
935   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
936   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
937 
938   b = (Mat_SeqSBAIJ*)B->data;
939   ierr = PetscFree(b->imax);CHKERRQ(ierr);
940   b->singlemalloc = PETSC_FALSE;
941   /* the next line frees the default space generated by the Create() */
942   ierr = PetscFree(b->a);CHKERRQ(ierr);
943   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
944   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
945   b->j    = uj;
946   b->i    = ui;
947   b->diag = 0;
948   b->ilen = 0;
949   b->imax = 0;
950   b->row  = perm;
951   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
952   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
953   b->icol = perm;
954   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
955   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
956   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
957   b->maxnz = b->nz = ui[mbs];
958 
959   B->factor                 = FACTOR_CHOLESKY;
960   B->info.factor_mallocs    = reallocs;
961   B->info.fill_ratio_given  = fill;
962   if (ai[mbs] != 0) {
963     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
964   } else {
965     B->info.fill_ratio_needed = 0.0;
966   }
967   if (perm_identity){
968     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
969     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
970     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
971   } else {
972     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
973   }
974   PetscFunctionReturn(0);
975 }
976