xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision d4dbac3cd6da13dc60b5d653f928051cfef0d71e)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include "src/mat/impls/baij/seq/baij.h"
5 #include "src/inline/ilu.h"
6 
7 /* ------------------------------------------------------------*/
8 /*
9       Version for when blocks are 2 by 2
10 */
11 #undef __FUNCT__
12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
14 {
15   Mat            C = *B;
16   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
17   IS             isrow = b->row,isicol = b->icol;
18   PetscErrorCode ierr;
19   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20   PetscInt       *ajtmpold,*ajtmp,nz,row;
21   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
23   MatScalar      p1,p2,p3,p4;
24   MatScalar      *ba = b->a,*aa = a->a;
25 
26   PetscFunctionBegin;
27   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
30 
31   for (i=0; i<n; i++) {
32     nz    = bi[i+1] - bi[i];
33     ajtmp = bj + bi[i];
34     for  (j=0; j<nz; j++) {
35       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
36     }
37     /* load in initial (unfactored row) */
38     idx      = r[i];
39     nz       = ai[idx+1] - ai[idx];
40     ajtmpold = aj + ai[idx];
41     v        = aa + 4*ai[idx];
42     for (j=0; j<nz; j++) {
43       x    = rtmp+4*ic[ajtmpold[j]];
44       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
45       v    += 4;
46     }
47     row = *ajtmp++;
48     while (row < i) {
49       pc = rtmp + 4*row;
50       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
51       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
52         pv = ba + 4*diag_offset[row];
53         pj = bj + diag_offset[row] + 1;
54         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
55         pc[0] = m1 = p1*x1 + p3*x2;
56         pc[1] = m2 = p2*x1 + p4*x2;
57         pc[2] = m3 = p1*x3 + p3*x4;
58         pc[3] = m4 = p2*x3 + p4*x4;
59         nz = bi[row+1] - diag_offset[row] - 1;
60         pv += 4;
61         for (j=0; j<nz; j++) {
62           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
63           x    = rtmp + 4*pj[j];
64           x[0] -= m1*x1 + m3*x2;
65           x[1] -= m2*x1 + m4*x2;
66           x[2] -= m1*x3 + m3*x4;
67           x[3] -= m2*x3 + m4*x4;
68           pv   += 4;
69         }
70         PetscLogFlops(16*nz+12);
71       }
72       row = *ajtmp++;
73     }
74     /* finished row so stick it into b->a */
75     pv = ba + 4*bi[i];
76     pj = bj + bi[i];
77     nz = bi[i+1] - bi[i];
78     for (j=0; j<nz; j++) {
79       x     = rtmp+4*pj[j];
80       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
81       pv   += 4;
82     }
83     /* invert diagonal block */
84     w = ba + 4*diag_offset[i];
85     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
86   }
87 
88   ierr = PetscFree(rtmp);CHKERRQ(ierr);
89   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
90   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
91   C->factor = FACTOR_LU;
92   C->assembled = PETSC_TRUE;
93   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
94   PetscFunctionReturn(0);
95 }
96 /*
97       Version for when blocks are 2 by 2 Using natural ordering
98 */
99 #undef __FUNCT__
100 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
102 {
103   Mat            C = *B;
104   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105   PetscErrorCode ierr;
106   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107   PetscInt       *ajtmpold,*ajtmp,nz,row;
108   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
110   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
111   MatScalar      *ba = b->a,*aa = a->a;
112 
113   PetscFunctionBegin;
114   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
115 
116   for (i=0; i<n; i++) {
117     nz    = bi[i+1] - bi[i];
118     ajtmp = bj + bi[i];
119     for  (j=0; j<nz; j++) {
120       x = rtmp+4*ajtmp[j];
121       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
122     }
123     /* load in initial (unfactored row) */
124     nz       = ai[i+1] - ai[i];
125     ajtmpold = aj + ai[i];
126     v        = aa + 4*ai[i];
127     for (j=0; j<nz; j++) {
128       x    = rtmp+4*ajtmpold[j];
129       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
130       v    += 4;
131     }
132     row = *ajtmp++;
133     while (row < i) {
134       pc  = rtmp + 4*row;
135       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
136       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
137         pv = ba + 4*diag_offset[row];
138         pj = bj + diag_offset[row] + 1;
139         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
140         pc[0] = m1 = p1*x1 + p3*x2;
141         pc[1] = m2 = p2*x1 + p4*x2;
142         pc[2] = m3 = p1*x3 + p3*x4;
143         pc[3] = m4 = p2*x3 + p4*x4;
144         nz = bi[row+1] - diag_offset[row] - 1;
145         pv += 4;
146         for (j=0; j<nz; j++) {
147           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
148           x    = rtmp + 4*pj[j];
149           x[0] -= m1*x1 + m3*x2;
150           x[1] -= m2*x1 + m4*x2;
151           x[2] -= m1*x3 + m3*x4;
152           x[3] -= m2*x3 + m4*x4;
153           pv   += 4;
154         }
155         PetscLogFlops(16*nz+12);
156       }
157       row = *ajtmp++;
158     }
159     /* finished row so stick it into b->a */
160     pv = ba + 4*bi[i];
161     pj = bj + bi[i];
162     nz = bi[i+1] - bi[i];
163     for (j=0; j<nz; j++) {
164       x      = rtmp+4*pj[j];
165       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
166       pv   += 4;
167     }
168     /* invert diagonal block */
169     w = ba + 4*diag_offset[i];
170     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
171     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
172   }
173 
174   ierr = PetscFree(rtmp);CHKERRQ(ierr);
175   C->factor    = FACTOR_LU;
176   C->assembled = PETSC_TRUE;
177   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
178   PetscFunctionReturn(0);
179 }
180 
181 /* ----------------------------------------------------------- */
182 /*
183      Version for when blocks are 1 by 1.
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
188 {
189   Mat            C = *B;
190   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
191   IS             isrow = b->row,isicol = b->icol;
192   PetscErrorCode ierr;
193   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
194   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195   PetscInt       *diag_offset = b->diag,diag,*pj;
196   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
197   MatScalar      *ba = b->a,*aa = a->a;
198 
199   PetscFunctionBegin;
200   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
201   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
203 
204   for (i=0; i<n; i++) {
205     nz    = bi[i+1] - bi[i];
206     ajtmp = bj + bi[i];
207     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
208 
209     /* load in initial (unfactored row) */
210     nz       = ai[r[i]+1] - ai[r[i]];
211     ajtmpold = aj + ai[r[i]];
212     v        = aa + ai[r[i]];
213     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
214 
215     row = *ajtmp++;
216     while (row < i) {
217       pc = rtmp + row;
218       if (*pc != 0.0) {
219         pv         = ba + diag_offset[row];
220         pj         = bj + diag_offset[row] + 1;
221         multiplier = *pc * *pv++;
222         *pc        = multiplier;
223         nz         = bi[row+1] - diag_offset[row] - 1;
224         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225         PetscLogFlops(1+2*nz);
226       }
227       row = *ajtmp++;
228     }
229     /* finished row so stick it into b->a */
230     pv = ba + bi[i];
231     pj = bj + bi[i];
232     nz = bi[i+1] - bi[i];
233     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
234     diag = diag_offset[i] - bi[i];
235     /* check pivot entry for current row */
236     if (pv[diag] == 0.0) {
237       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
238     }
239     pv[diag] = 1.0/pv[diag];
240   }
241 
242   ierr = PetscFree(rtmp);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
245   C->factor    = FACTOR_LU;
246   C->assembled = PETSC_TRUE;
247   PetscLogFlops(C->n);
248   PetscFunctionReturn(0);
249 }
250 
251 
252 /* ----------------------------------------------------------- */
253 #undef __FUNCT__
254 #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
256 {
257   PetscErrorCode ierr;
258   Mat            C;
259 
260   PetscFunctionBegin;
261   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
262   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
263   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
265   PetscFunctionReturn(0);
266 }
267 
268 #include "src/mat/impls/sbaij/seq/sbaij.h"
269 #undef __FUNCT__
270 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N"
271 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B)
272 {
273   Mat            C = *B;
274   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
275   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
276   IS             ip=b->row;
277   PetscErrorCode ierr,(*f)(Mat,Mat*);
278   PetscInt       *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol;
279   PetscInt       *ai=a->i,*aj=a->j;
280   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
281   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
282   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
283   PetscTruth     damp,chshift;
284   PetscInt       nshift=0,ndamp=0;
285 
286   PetscFunctionBegin;
287   if (bs > 1) {
288     if (!a->sbaijMat){
289       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
290     }
291     ierr = PetscObjectQueryFunction((PetscObject)C,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr);
292     ierr = (*f)(a->sbaijMat,B);CHKERRQ(ierr);
293     ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
294     a->sbaijMat = PETSC_NULL;
295     PetscFunctionReturn(0);
296   }
297 
298   /* initialization */
299   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
300   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
301   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
302   jl   = il + mbs;
303   rtmp = (MatScalar*)(jl + mbs);
304 
305   shift_amount = 0;
306   do {
307     damp = PETSC_FALSE;
308     chshift = PETSC_FALSE;
309     for (i=0; i<mbs; i++) {
310       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
311     }
312 
313     for (k = 0; k<mbs; k++){
314       bval = ba + bi[k];
315       /* initialize k-th row by the perm[k]-th row of A */
316       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
317       for (j = jmin; j < jmax; j++){
318         col = rip[aj[j]];
319         if (col >= k){ /* only take upper triangular entry */
320           rtmp[col] = aa[j];
321           *bval++  = 0.0; /* for in-place factorization */
322         }
323       }
324       /* damp the diagonal of the matrix */
325       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
326 
327       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
328       dk = rtmp[k];
329       i = jl[k]; /* first row to be added to k_th row  */
330 
331       while (i < k){
332         nexti = jl[i]; /* next row to be added to k_th row */
333 
334         /* compute multiplier, update diag(k) and U(i,k) */
335         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
336         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
337         dk += uikdi*ba[ili];
338         ba[ili] = uikdi; /* -U(i,k) */
339 
340         /* add multiple of row i to k-th row */
341         jmin = ili + 1; jmax = bi[i+1];
342         if (jmin < jmax){
343           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
344           /* update il and jl for row i */
345           il[i] = jmin;
346           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
347         }
348         i = nexti;
349       }
350 
351       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
352 	/* calculate a shift that would make this row diagonally dominant */
353 	PetscReal rs = PetscAbs(PetscRealPart(dk));
354 	jmin      = bi[k]+1;
355 	nz        = bi[k+1] - jmin;
356 	if (nz){
357 	  bcol = bj + jmin;
358 	  bval = ba + jmin;
359 	  while (nz--){
360 	    rs += PetscAbsScalar(rtmp[*bcol++]);
361 	  }
362 	}
363 	/* if this shift is less than the previous, just up the previous
364 	   one by a bit */
365 	shift_amount = PetscMax(rs,1.1*shift_amount);
366 	chshift  = PETSC_TRUE;
367 	/* Unlike in the ILU case there is no exit condition on nshift:
368 	   we increase the shift until it converges. There is no guarantee that
369 	   this algorithm converges faster or slower, or is better or worse
370 	   than the ILU algorithm. */
371 	nshift++;
372 	break;
373       }
374       if (PetscRealPart(dk) < zeropivot){
375         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
376         if (damping > 0.0) {
377           if (ndamp) damping *= 2.0;
378           damp = PETSC_TRUE;
379           ndamp++;
380           break;
381         } else if (PetscAbsScalar(dk) < zeropivot){
382           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
383         } else {
384           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
385         }
386       }
387 
388       /* copy data into U(k,:) */
389       ba[bi[k]] = 1.0/dk; /* U(k,k) */
390       jmin = bi[k]+1; jmax = bi[k+1];
391       if (jmin < jmax) {
392         for (j=jmin; j<jmax; j++){
393           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
394         }
395         /* add the k-th row into il and jl */
396         il[k] = jmin;
397         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
398       }
399     }
400   } while (damp||chshift);
401   ierr = PetscFree(il);CHKERRQ(ierr);
402 
403   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
404   C->factor       = FACTOR_CHOLESKY;
405   C->assembled    = PETSC_TRUE;
406   C->preallocated = PETSC_TRUE;
407   PetscLogFlops(C->m);
408   if (ndamp) {
409     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
410   }
411   if (nshift) {
412     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering"
419 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact)
420 {
421   Mat            C = *fact;
422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
423   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
424   PetscErrorCode ierr;
425   PetscInt       i,j,am=a->mbs,bs=A->bs;
426   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
427   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
428   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
429   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
430   PetscTruth     damp,chshift;
431   PetscInt       nshift=0;
432 
433   PetscFunctionBegin;
434   if (bs > 1) {
435     SETERRQ(PETSC_ERR_USER,"not done yet");
436   }
437 
438   /* initialization */
439   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
440   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
441   jl   = il + am;
442   rtmp = (MatScalar*)(jl + am);
443 
444   shift_amount = 0;
445   do {
446     damp = PETSC_FALSE;
447     chshift = PETSC_FALSE;
448     for (i=0; i<am; i++) {
449       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
450     }
451 
452     for (k = 0; k<am; k++){
453     /* initialize k-th row with elements nonzero in row perm(k) of A */
454       nz   = ai[k+1] - ai[k];
455       acol = aj + ai[k];
456       aval = aa + ai[k];
457       bval = ba + bi[k];
458       while (nz -- ){
459         if (*acol < k) { /* skip lower triangular entries */
460           acol++; aval++;
461         } else {
462           rtmp[*acol++] = *aval++;
463           *bval++       = 0.0; /* for in-place factorization */
464         }
465       }
466       /* damp the diagonal of the matrix */
467       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
468 
469       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
470       dk = rtmp[k];
471       i  = jl[k]; /* first row to be added to k_th row  */
472 
473       while (i < k){
474         nexti = jl[i]; /* next row to be added to k_th row */
475         /* compute multiplier, update D(k) and U(i,k) */
476         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
477         uikdi = - ba[ili]*ba[bi[i]];
478         dk   += uikdi*ba[ili];
479         ba[ili] = uikdi; /* -U(i,k) */
480 
481         /* add multiple of row i to k-th row ... */
482         jmin = ili + 1;
483         nz   = bi[i+1] - jmin;
484         if (nz > 0){
485           bcol = bj + jmin;
486           bval = ba + jmin;
487           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
488           /* update il and jl for i-th row */
489           il[i] = jmin;
490           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
491         }
492         i = nexti;
493       }
494 
495       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
496 	/* calculate a shift that would make this row diagonally dominant */
497 	PetscReal rs = PetscAbs(PetscRealPart(dk));
498 	jmin      = bi[k]+1;
499 	nz        = bi[k+1] - jmin;
500 	if (nz){
501 	  bcol = bj + jmin;
502 	  bval = ba + jmin;
503 	  while (nz--){
504 	    rs += PetscAbsScalar(rtmp[*bcol++]);
505 	  }
506 	}
507 	/* if this shift is less than the previous, just up the previous
508 	   one by a bit */
509 	shift_amount = PetscMax(rs,1.1*shift_amount);
510 	chshift  = PETSC_TRUE;
511 	/* Unlike in the ILU case there is no exit condition on nshift:
512 	   we increase the shift until it converges. There is no guarantee that
513 	   this algorithm converges faster or slower, or is better or worse
514 	   than the ILU algorithm. */
515 	nshift++;
516 	break;
517       }
518       if (PetscRealPart(dk) < zeropivot){
519         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
520         if (damping > 0.0) {
521           if (ndamp) damping *= 2.0;
522           damp = PETSC_TRUE;
523           ndamp++;
524           break;
525         } else if (PetscAbsScalar(dk) < zeropivot){
526           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
527         } else {
528           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
529         }
530       }
531 
532       /* copy data into U(k,:) */
533       ba[bi[k]] = 1.0/dk;
534       jmin      = bi[k]+1;
535       nz        = bi[k+1] - jmin;
536       if (nz){
537         bcol = bj + jmin;
538         bval = ba + jmin;
539         while (nz--){
540           *bval++       = rtmp[*bcol];
541           rtmp[*bcol++] = 0.0;
542         }
543         /* add k-th row into il and jl */
544         il[k] = jmin;
545         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
546       }
547     }
548   } while (damp||chshift);
549   ierr = PetscFree(il);CHKERRQ(ierr);
550 
551   C->factor       = FACTOR_CHOLESKY;
552   C->assembled    = PETSC_TRUE;
553   C->preallocated = PETSC_TRUE;
554   PetscLogFlops(C->m);
555   if (ndamp) {
556     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
557   }
558   if (nshift) {
559     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #include "petscbt.h"
565 #include "src/mat/utils/freespace.h"
566 #undef __FUNCT__
567 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ"
568 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
569 {
570   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
571   Mat_SeqSBAIJ   *b;
572   Mat            B;
573   PetscErrorCode ierr;
574   PetscTruth     perm_identity;
575   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui;
576   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
577   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
578   PetscReal      fill=info->fill,levels=info->levels;
579   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
580   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
581   PetscBT        lnkbt;
582 
583   PetscFunctionBegin;
584   if (bs > 1){
585     if (!a->sbaijMat){
586       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
587     }
588     ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
589     B = *fact;
590     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
591     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
592     PetscFunctionReturn(0);
593   }
594 
595   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
596   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
597 
598   /* special case that simply copies fill pattern */
599   if (!levels && perm_identity) {
600     ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
601     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
602     for (i=0; i<am; i++) {
603       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
604     }
605     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
606     B = *fact;
607     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
608     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
609 
610     b  = (Mat_SeqSBAIJ*)B->data;
611     uj = b->j;
612     for (i=0; i<am; i++) {
613       aj = a->j + a->diag[i];
614       for (j=0; j<ui[i]; j++){
615         *uj++ = *aj++;
616       }
617       b->ilen[i] = ui[i];
618     }
619     ierr = PetscFree(ui);CHKERRQ(ierr);
620     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622 
623     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
624     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
625     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
626     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
627     PetscFunctionReturn(0);
628   }
629 
630   /* initialization */
631   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
632   ui[0] = 0;
633   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
634 
635   /* jl: linked list for storing indices of the pivot rows
636      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
637   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
638   il         = jl + am;
639   uj_ptr     = (PetscInt**)(il + am);
640   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
641   for (i=0; i<am; i++){
642     jl[i] = am; il[i] = 0;
643   }
644 
645   /* create and initialize a linked list for storing column indices of the active row k */
646   nlnk = am + 1;
647   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
648 
649   /* initial FreeSpace size is fill*(ai[am]+1) */
650   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
651   current_space = free_space;
652   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
653   current_space_lvl = free_space_lvl;
654 
655   for (k=0; k<am; k++){  /* for each active row k */
656     /* initialize lnk by the column indices of row rip[k] of A */
657     nzk   = 0;
658     ncols = ai[rip[k]+1] - ai[rip[k]];
659     ncols_upper = 0;
660     cols        = cols_lvl + am;
661     for (j=0; j<ncols; j++){
662       i = rip[*(aj + ai[rip[k]] + j)];
663       if (i >= k){ /* only take upper triangular entry */
664         cols[ncols_upper] = i;
665         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
666         ncols_upper++;
667       }
668     }
669     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
670     nzk += nlnk;
671 
672     /* update lnk by computing fill-in for each pivot row to be merged in */
673     prow = jl[k]; /* 1st pivot row */
674 
675     while (prow < k){
676       nextprow = jl[prow];
677 
678       /* merge prow into k-th row */
679       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
680       jmax = ui[prow+1];
681       ncols = jmax-jmin;
682       i     = jmin - ui[prow];
683       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
684       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
685       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
686       nzk += nlnk;
687 
688       /* update il and jl for prow */
689       if (jmin < jmax){
690         il[prow] = jmin;
691         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
692       }
693       prow = nextprow;
694     }
695 
696     /* if free space is not available, make more free space */
697     if (current_space->local_remaining<nzk) {
698       i = am - k + 1; /* num of unfactored rows */
699       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
700       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
701       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
702       reallocs++;
703     }
704 
705     /* copy data into free_space and free_space_lvl, then initialize lnk */
706     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
707 
708     /* add the k-th row into il and jl */
709     if (nzk-1 > 0){
710       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
711       jl[k] = jl[i]; jl[i] = k;
712       il[k] = ui[k] + 1;
713     }
714     uj_ptr[k]     = current_space->array;
715     uj_lvl_ptr[k] = current_space_lvl->array;
716 
717     current_space->array           += nzk;
718     current_space->local_used      += nzk;
719     current_space->local_remaining -= nzk;
720 
721     current_space_lvl->array           += nzk;
722     current_space_lvl->local_used      += nzk;
723     current_space_lvl->local_remaining -= nzk;
724 
725     ui[k+1] = ui[k] + nzk;
726   }
727 
728   if (ai[am] != 0) {
729     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
730     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
731     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
732     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
733   } else {
734      PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n");
735   }
736 
737   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
738   ierr = PetscFree(jl);CHKERRQ(ierr);
739   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
740 
741   /* destroy list of free space and other temporary array(s) */
742   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
743   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
744   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
745   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
746 
747   /* put together the new matrix in MATSEQSBAIJ format */
748   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
749   B = *fact;
750   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
751   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
752 
753   b = (Mat_SeqSBAIJ*)B->data;
754   ierr = PetscFree(b->imax);CHKERRQ(ierr);
755   b->singlemalloc = PETSC_FALSE;
756   /* the next line frees the default space generated by the Create() */
757   ierr = PetscFree(b->a);CHKERRQ(ierr);
758   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
759   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
760   b->j    = uj;
761   b->i    = ui;
762   b->diag = 0;
763   b->ilen = 0;
764   b->imax = 0;
765   b->row  = perm;
766   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
767   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
768   b->icol = perm;
769   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
770   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
771   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
772   b->maxnz = b->nz = ui[am];
773 
774   B->factor                 = FACTOR_CHOLESKY;
775   B->info.factor_mallocs    = reallocs;
776   B->info.fill_ratio_given  = fill;
777   if (ai[am] != 0) {
778     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
779   } else {
780     B->info.fill_ratio_needed = 0.0;
781   }
782   if (perm_identity){
783     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
784     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
785     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
786     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
787   } else {
788     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ"
795 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
796 {
797   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
798   Mat_SeqSBAIJ   *b;
799   Mat            B;
800   PetscErrorCode ierr;
801   PetscTruth     perm_identity;
802   PetscReal      fill = info->fill;
803   PetscInt       *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
804   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
805   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
806   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
807   PetscBT        lnkbt;
808   IS             iperm;
809 
810   PetscFunctionBegin;
811   if (bs > 1) { /* convert to seqsbaij */
812     if (!a->sbaijMat){
813       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
814     }
815     ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
816     B = *fact;
817     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
818     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
819     PetscFunctionReturn(0);
820   }
821 
822   /* check whether perm is the identity mapping */
823   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
824   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
825 
826   if (!perm_identity){
827     /* check if perm is symmetric! */
828     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
829     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
830     for (i=0; i<mbs; i++) {
831       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
832     }
833     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
834     ierr = ISDestroy(iperm);CHKERRQ(ierr);
835   }
836 
837   /* initialization */
838   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
839   ui[0] = 0;
840 
841   /* jl: linked list for storing indices of the pivot rows
842      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
843   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
844   il     = jl + mbs;
845   cols   = il + mbs;
846   ui_ptr = (PetscInt**)(cols + mbs);
847   for (i=0; i<mbs; i++){
848     jl[i] = mbs; il[i] = 0;
849   }
850 
851   /* create and initialize a linked list for storing column indices of the active row k */
852   nlnk = mbs + 1;
853   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
854 
855   /* initial FreeSpace size is fill*(ai[mbs]+1) */
856   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
857   current_space = free_space;
858 
859   for (k=0; k<mbs; k++){  /* for each active row k */
860     /* initialize lnk by the column indices of row rip[k] of A */
861     nzk   = 0;
862     ncols = ai[rip[k]+1] - ai[rip[k]];
863     ncols_upper = 0;
864     for (j=0; j<ncols; j++){
865       i = rip[*(aj + ai[rip[k]] + j)];
866       if (i >= k){ /* only take upper triangular entry */
867         cols[ncols_upper] = i;
868         ncols_upper++;
869       }
870     }
871     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
872     nzk += nlnk;
873 
874     /* update lnk by computing fill-in for each pivot row to be merged in */
875     prow = jl[k]; /* 1st pivot row */
876 
877     while (prow < k){
878       nextprow = jl[prow];
879       /* merge prow into k-th row */
880       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
881       jmax = ui[prow+1];
882       ncols = jmax-jmin;
883       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
884       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
885       nzk += nlnk;
886 
887       /* update il and jl for prow */
888       if (jmin < jmax){
889         il[prow] = jmin;
890         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
891       }
892       prow = nextprow;
893     }
894 
895     /* if free space is not available, make more free space */
896     if (current_space->local_remaining<nzk) {
897       i = mbs - k + 1; /* num of unfactored rows */
898       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
899       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
900       reallocs++;
901     }
902 
903     /* copy data into free space, then initialize lnk */
904     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
905 
906     /* add the k-th row into il and jl */
907     if (nzk-1 > 0){
908       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
909       jl[k] = jl[i]; jl[i] = k;
910       il[k] = ui[k] + 1;
911     }
912     ui_ptr[k] = current_space->array;
913     current_space->array           += nzk;
914     current_space->local_used      += nzk;
915     current_space->local_remaining -= nzk;
916 
917     ui[k+1] = ui[k] + nzk;
918   }
919 
920   if (ai[mbs] != 0) {
921     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
922     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
923     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
924     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
925   } else {
926      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
927   }
928 
929   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
930   ierr = PetscFree(jl);CHKERRQ(ierr);
931 
932   /* destroy list of free space and other temporary array(s) */
933   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
934   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
935   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
936 
937   /* put together the new matrix in MATSEQSBAIJ format */
938   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
939   B    = *fact;
940   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
941   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
942 
943   b = (Mat_SeqSBAIJ*)B->data;
944   ierr = PetscFree(b->imax);CHKERRQ(ierr);
945   b->singlemalloc = PETSC_FALSE;
946   /* the next line frees the default space generated by the Create() */
947   ierr = PetscFree(b->a);CHKERRQ(ierr);
948   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
949   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
950   b->j    = uj;
951   b->i    = ui;
952   b->diag = 0;
953   b->ilen = 0;
954   b->imax = 0;
955   b->row  = perm;
956   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
957   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
958   b->icol = perm;
959   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
960   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
961   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
962   b->maxnz = b->nz = ui[mbs];
963 
964   B->factor                 = FACTOR_CHOLESKY;
965   B->info.factor_mallocs    = reallocs;
966   B->info.fill_ratio_given  = fill;
967   if (ai[mbs] != 0) {
968     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
969   } else {
970     B->info.fill_ratio_needed = 0.0;
971   }
972   if (perm_identity){
973     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
974     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
975     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
976     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
977   } else {
978     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
979   }
980   PetscFunctionReturn(0);
981 }
982