xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 5f9f512d1b4e849c2c79f8a3b09658981300aaaf)
1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 #include "src/vec/vecimpl.h"
6 #include "src/inline/ilu.h"
7 #include "include/petscis.h"
8 
9 /*
10   input:
11    A -- original matrix in SEQSBAIJ format
12    F -- symbolic or numeric factor of A
13   output:
14    F -- numeric factor of A if F is input as symbolic factor of A
15    nneg, nzero, npos: inertia of A
16 */
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "MatGetInertia_SeqSBAIJ"
20 int MatGetInertia_SeqSBAIJ(Mat A,Mat *F,int *nneig,int *nzero,int *npos)
21 {
22   PetscScalar  *dd;
23   Mat_SeqSBAIJ *fact_ptr;
24   int          ierr,m=A->m,i;
25 
26   PetscFunctionBegin;
27   if (! (*F)->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for symfactor yet, use numfactor F");
28 
29   fact_ptr = (Mat_SeqSBAIJ*)(*F)->data;
30   dd       = fact_ptr->a;
31   *nneig = 0;
32   for (i=0; i<m; i++){
33     if (dd[i] < 0.0) (*nneig)++;
34   }
35 
36   PetscFunctionReturn(0);
37 }
38 
39 /* Using Modified Sparse Row (MSR) storage.
40 See page 85, "Iterative Methods ..." by Saad. */
41 /*
42     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
43 */
44 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */
45 #undef __FUNCT__
46 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
47 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
48 {
49   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
50   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
51   int          *jutmp,bs = a->bs,bs2=a->bs2;
52   int          m,realloc = 0,prow;
53   int          *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
54   PetscTruth   perm_identity;
55 
56   PetscFunctionBegin;
57 
58   /* check whether perm is the identity mapping */
59   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
60   if (!perm_identity) a->permute = PETSC_TRUE;
61 
62   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
63 
64   if (perm_identity){ /* without permutation */
65     ai = a->i; aj = a->j;
66   } else {            /* non-trivial permutation */
67     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
68     ai = a->inew; aj = a->jnew;
69   }
70 
71   /* initialization */
72   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
73   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
74   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
75   iu[0] = mbs+1;
76   juidx = mbs + 1; /* index for ju */
77   ierr  = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */
78   q     = jl + mbs;   /* linked list for col index */
79   for (i=0; i<mbs; i++){
80     jl[i] = mbs;
81     q[i] = 0;
82   }
83 
84   /* for each row k */
85   for (k=0; k<mbs; k++){
86     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
87     q[k] = mbs;
88     /* initialize nonzero structure of k-th row to row rip[k] of A */
89     jmin = ai[rip[k]];
90     jmax = ai[rip[k]+1];
91     for (j=jmin; j<jmax; j++){
92       vj = rip[aj[j]]; /* col. value */
93       if(vj > k){
94         qm = k;
95         do {
96           m  = qm; qm = q[m];
97         } while(qm < vj);
98         if (qm == vj) {
99           SETERRQ(1," error: duplicate entry in A\n");
100         }
101         nzk++;
102         q[m]  = vj;
103         q[vj] = qm;
104       } /* if(vj > k) */
105     } /* for (j=jmin; j<jmax; j++) */
106 
107     /* modify nonzero structure of k-th row by computing fill-in
108        for each row i to be merged in */
109     prow = k;
110     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
111 
112     while (prow < k){
113       /* merge row prow into k-th row */
114       jmin = iu[prow] + 1; jmax = iu[prow+1];
115       qm = k;
116       for (j=jmin; j<jmax; j++){
117         vj = ju[j];
118         do {
119           m = qm; qm = q[m];
120         } while (qm < vj);
121         if (qm != vj){
122          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
123         }
124       }
125       prow = jl[prow]; /* next pivot row */
126     }
127 
128     /* add k to row list for first nonzero element in k-th row */
129     if (nzk > 0){
130       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
131       jl[k] = jl[i]; jl[i] = k;
132     }
133     iu[k+1] = iu[k] + nzk;
134 
135     /* allocate more space to ju if needed */
136     if (iu[k+1] > umax) {
137       /* estimate how much additional space we will need */
138       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
139       /* just double the memory each time */
140       maxadd = umax;
141       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
142       umax += maxadd;
143 
144       /* allocate a longer ju */
145       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
146       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
147       ierr = PetscFree(ju);CHKERRQ(ierr);
148       ju   = jutmp;
149       realloc++; /* count how many times we realloc */
150     }
151 
152     /* save nonzero structure of k-th row in ju */
153     i=k;
154     while (nzk --) {
155       i           = q[i];
156       ju[juidx++] = i;
157     }
158   }
159 
160   if (ai[mbs] != 0) {
161     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
162     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
163     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
164     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
165     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
166   } else {
167      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
168   }
169 
170   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
171   /* ierr = PetscFree(q);CHKERRQ(ierr); */
172   ierr = PetscFree(jl);CHKERRQ(ierr);
173 
174   /* put together the new matrix */
175   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
176   /* PetscLogObjectParent(*B,iperm); */
177   b = (Mat_SeqSBAIJ*)(*B)->data;
178   ierr = PetscFree(b->imax);CHKERRQ(ierr);
179   b->singlemalloc = PETSC_FALSE;
180   /* the next line frees the default space generated by the Create() */
181   ierr = PetscFree(b->a);CHKERRQ(ierr);
182   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
183   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
184   b->j    = ju;
185   b->i    = iu;
186   b->diag = 0;
187   b->ilen = 0;
188   b->imax = 0;
189   b->row  = perm;
190   b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
191   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
192   b->icol = perm;
193   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
194   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
195   /* In b structure:  Free imax, ilen, old a, old j.
196      Allocate idnew, solve_work, new a, new j */
197   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
198   b->s_maxnz = b->s_nz = iu[mbs];
199 
200   (*B)->factor                 = FACTOR_CHOLESKY;
201   (*B)->info.factor_mallocs    = realloc;
202   (*B)->info.fill_ratio_given  = f;
203   if (ai[mbs] != 0) {
204     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
205   } else {
206     (*B)->info.fill_ratio_needed = 0.0;
207   }
208 
209   if (perm_identity){
210     switch (bs) {
211       case 1:
212         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
213         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
214         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
215         break;
216       case 2:
217         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
218         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
219         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
220         break;
221       case 3:
222         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
223         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
224         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
225         break;
226       case 4:
227         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
228         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
229         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
230         break;
231       case 5:
232         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
233         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
234         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
235         break;
236       case 6:
237         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
238         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
239         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
240         break;
241       case 7:
242         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
243         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
244         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
245       break;
246       default:
247         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
248         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
249         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
250       break;
251     }
252   }
253 
254   PetscFunctionReturn(0);
255 }
256 
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
260 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
261 {
262   Mat                C = *B;
263   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
264   IS                 perm = b->row;
265   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
266   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
267   int                bs=a->bs,bs2 = a->bs2;
268   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
269   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
270   MatScalar          *work;
271   int                *pivots;
272 
273   PetscFunctionBegin;
274 
275   /* initialization */
276   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
277   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
278   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
279   jl   = il + mbs;
280   for (i=0; i<mbs; i++) {
281     jl[i] = mbs; il[0] = 0;
282   }
283   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
284   uik  = dk + bs2;
285   work = uik + bs2;
286   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
287 
288   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
289 
290   /* check permutation */
291   if (!a->permute){
292     ai = a->i; aj = a->j; aa = a->a;
293   } else {
294     ai   = a->inew; aj = a->jnew;
295     ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
296     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
297     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
298     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
299 
300     for (i=0; i<mbs; i++){
301       jmin = ai[i]; jmax = ai[i+1];
302       for (j=jmin; j<jmax; j++){
303         while (a2anew[j] != j){
304           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
305           for (k1=0; k1<bs2; k1++){
306             dk[k1]       = aa[k*bs2+k1];
307             aa[k*bs2+k1] = aa[j*bs2+k1];
308             aa[j*bs2+k1] = dk[k1];
309           }
310         }
311         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
312         if (i > aj[j]){
313           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
314           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
315           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
316           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
317             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
318           }
319         }
320       }
321     }
322     ierr = PetscFree(a2anew);CHKERRQ(ierr);
323   }
324 
325   /* for each row k */
326   for (k = 0; k<mbs; k++){
327 
328     /*initialize k-th row with elements nonzero in row perm(k) of A */
329     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
330 
331     ap = aa + jmin*bs2;
332     for (j = jmin; j < jmax; j++){
333       vj = perm_ptr[aj[j]];         /* block col. index */
334       rtmp_ptr = rtmp + vj*bs2;
335       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
336     }
337 
338     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
339     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
340     i = jl[k]; /* first row to be added to k_th row  */
341 
342     while (i < k){
343       nexti = jl[i]; /* next row to be added to k_th row */
344 
345       /* compute multiplier */
346       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
347 
348       /* uik = -inv(Di)*U_bar(i,k) */
349       diag = ba + i*bs2;
350       u    = ba + ili*bs2;
351       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
352       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
353 
354       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
355       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
356 
357       /* update -U(i,k) */
358       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
359 
360       /* add multiple of row i to k-th row ... */
361       jmin = ili + 1; jmax = bi[i+1];
362       if (jmin < jmax){
363         for (j=jmin; j<jmax; j++) {
364           /* rtmp += -U(i,k)^T * U_bar(i,j) */
365           rtmp_ptr = rtmp + bj[j]*bs2;
366           u = ba + j*bs2;
367           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
368         }
369 
370         /* ... add i to row list for next nonzero entry */
371         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
372         j     = bj[jmin];
373         jl[i] = jl[j]; jl[j] = i; /* update jl */
374       }
375       i = nexti;
376     }
377 
378     /* save nonzero entries in k-th row of U ... */
379 
380     /* invert diagonal block */
381     diag = ba+k*bs2;
382     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
383     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
384 
385     jmin = bi[k]; jmax = bi[k+1];
386     if (jmin < jmax) {
387       for (j=jmin; j<jmax; j++){
388          vj = bj[j];           /* block col. index of U */
389          u   = ba + j*bs2;
390          rtmp_ptr = rtmp + vj*bs2;
391          for (k1=0; k1<bs2; k1++){
392            *u++        = *rtmp_ptr;
393            *rtmp_ptr++ = 0.0;
394          }
395       }
396 
397       /* ... add k to row list for first nonzero entry in k-th row */
398       il[k] = jmin;
399       i     = bj[jmin];
400       jl[k] = jl[i]; jl[i] = k;
401     }
402   }
403 
404   ierr = PetscFree(rtmp);CHKERRQ(ierr);
405   ierr = PetscFree(il);CHKERRQ(ierr);
406   ierr = PetscFree(dk);CHKERRQ(ierr);
407   ierr = PetscFree(pivots);CHKERRQ(ierr);
408   if (a->permute){
409     ierr = PetscFree(aa);CHKERRQ(ierr);
410   }
411 
412   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
413   C->factor    = FACTOR_CHOLESKY;
414   C->assembled = PETSC_TRUE;
415   C->preallocated = PETSC_TRUE;
416   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
422 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
423 {
424   Mat                C = *B;
425   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
426   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
427   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
428   int                bs=a->bs,bs2 = a->bs2;
429   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
430   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
431   MatScalar          *work;
432   int                *pivots;
433 
434   PetscFunctionBegin;
435 
436   /* initialization */
437 
438   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
439   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
440   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
441   jl   = il + mbs;
442   for (i=0; i<mbs; i++) {
443     jl[i] = mbs; il[0] = 0;
444   }
445   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
446   uik  = dk + bs2;
447   work = uik + bs2;
448   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
449 
450   ai = a->i; aj = a->j; aa = a->a;
451 
452   /* for each row k */
453   for (k = 0; k<mbs; k++){
454 
455     /*initialize k-th row with elements nonzero in row k of A */
456     jmin = ai[k]; jmax = ai[k+1];
457     ap = aa + jmin*bs2;
458     for (j = jmin; j < jmax; j++){
459       vj = aj[j];         /* block col. index */
460       rtmp_ptr = rtmp + vj*bs2;
461       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
462     }
463 
464     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
465     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
466     i = jl[k]; /* first row to be added to k_th row  */
467 
468     while (i < k){
469       nexti = jl[i]; /* next row to be added to k_th row */
470 
471       /* compute multiplier */
472       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
473 
474       /* uik = -inv(Di)*U_bar(i,k) */
475       diag = ba + i*bs2;
476       u    = ba + ili*bs2;
477       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
478       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
479 
480       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
481       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
482 
483       /* update -U(i,k) */
484       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
485 
486       /* add multiple of row i to k-th row ... */
487       jmin = ili + 1; jmax = bi[i+1];
488       if (jmin < jmax){
489         for (j=jmin; j<jmax; j++) {
490           /* rtmp += -U(i,k)^T * U_bar(i,j) */
491           rtmp_ptr = rtmp + bj[j]*bs2;
492           u = ba + j*bs2;
493           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
494         }
495 
496         /* ... add i to row list for next nonzero entry */
497         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
498         j     = bj[jmin];
499         jl[i] = jl[j]; jl[j] = i; /* update jl */
500       }
501       i = nexti;
502     }
503 
504     /* save nonzero entries in k-th row of U ... */
505 
506     /* invert diagonal block */
507     diag = ba+k*bs2;
508     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
509     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
510 
511     jmin = bi[k]; jmax = bi[k+1];
512     if (jmin < jmax) {
513       for (j=jmin; j<jmax; j++){
514          vj = bj[j];           /* block col. index of U */
515          u   = ba + j*bs2;
516          rtmp_ptr = rtmp + vj*bs2;
517          for (k1=0; k1<bs2; k1++){
518            *u++        = *rtmp_ptr;
519            *rtmp_ptr++ = 0.0;
520          }
521       }
522 
523       /* ... add k to row list for first nonzero entry in k-th row */
524       il[k] = jmin;
525       i     = bj[jmin];
526       jl[k] = jl[i]; jl[i] = k;
527     }
528   }
529 
530   ierr = PetscFree(rtmp);CHKERRQ(ierr);
531   ierr = PetscFree(il);CHKERRQ(ierr);
532   ierr = PetscFree(dk);CHKERRQ(ierr);
533   ierr = PetscFree(pivots);CHKERRQ(ierr);
534 
535   C->factor    = FACTOR_CHOLESKY;
536   C->assembled = PETSC_TRUE;
537   C->preallocated = PETSC_TRUE;
538   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
544     Version for blocks 2 by 2.
545 */
546 #undef __FUNCT__
547 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
548 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
549 {
550   Mat                C = *B;
551   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
552   IS                 perm = b->row;
553   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
554   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
555   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
556   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
557 
558   PetscFunctionBegin;
559 
560   /* initialization */
561   /* il and jl record the first nonzero element in each row of the accessing
562      window U(0:k, k:mbs-1).
563      jl:    list of rows to be added to uneliminated rows
564             i>= k: jl(i) is the first row to be added to row i
565             i<  k: jl(i) is the row following row i in some list of rows
566             jl(i) = mbs indicates the end of a list
567      il(i): points to the first nonzero element in columns k,...,mbs-1 of
568             row i of U */
569   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
570   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
571   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
572   jl   = il + mbs;
573   for (i=0; i<mbs; i++) {
574     jl[i] = mbs; il[0] = 0;
575   }
576   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
577   uik  = dk + 4;
578   ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
579 
580   /* check permutation */
581   if (!a->permute){
582     ai = a->i; aj = a->j; aa = a->a;
583   } else {
584     ai   = a->inew; aj = a->jnew;
585     ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
586     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
587     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
588     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
589 
590     for (i=0; i<mbs; i++){
591       jmin = ai[i]; jmax = ai[i+1];
592       for (j=jmin; j<jmax; j++){
593         while (a2anew[j] != j){
594           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
595           for (k1=0; k1<4; k1++){
596             dk[k1]       = aa[k*4+k1];
597             aa[k*4+k1] = aa[j*4+k1];
598             aa[j*4+k1] = dk[k1];
599           }
600         }
601         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
602         if (i > aj[j]){
603           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
604           ap = aa + j*4;     /* ptr to the beginning of the block */
605           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
606           ap[1] = ap[2];
607           ap[2] = dk[1];
608         }
609       }
610     }
611     ierr = PetscFree(a2anew);CHKERRQ(ierr);
612   }
613 
614   /* for each row k */
615   for (k = 0; k<mbs; k++){
616 
617     /*initialize k-th row with elements nonzero in row perm(k) of A */
618     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
619     ap = aa + jmin*4;
620     for (j = jmin; j < jmax; j++){
621       vj = perm_ptr[aj[j]];         /* block col. index */
622       rtmp_ptr = rtmp + vj*4;
623       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
624     }
625 
626     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
627     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
628     i = jl[k]; /* first row to be added to k_th row  */
629 
630     while (i < k){
631       nexti = jl[i]; /* next row to be added to k_th row */
632 
633       /* compute multiplier */
634       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
635 
636       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
637       diag = ba + i*4;
638       u    = ba + ili*4;
639       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
640       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
641       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
642       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
643 
644       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
645       dk[0] += uik[0]*u[0] + uik[1]*u[1];
646       dk[1] += uik[2]*u[0] + uik[3]*u[1];
647       dk[2] += uik[0]*u[2] + uik[1]*u[3];
648       dk[3] += uik[2]*u[2] + uik[3]*u[3];
649 
650       /* update -U(i,k): ba[ili] = uik */
651       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
652 
653       /* add multiple of row i to k-th row ... */
654       jmin = ili + 1; jmax = bi[i+1];
655       if (jmin < jmax){
656         for (j=jmin; j<jmax; j++) {
657           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
658           rtmp_ptr = rtmp + bj[j]*4;
659           u = ba + j*4;
660           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
661           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
662           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
663           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
664         }
665 
666         /* ... add i to row list for next nonzero entry */
667         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
668         j     = bj[jmin];
669         jl[i] = jl[j]; jl[j] = i; /* update jl */
670       }
671       i = nexti;
672     }
673 
674     /* save nonzero entries in k-th row of U ... */
675 
676     /* invert diagonal block */
677     diag = ba+k*4;
678     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
679     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
680 
681     jmin = bi[k]; jmax = bi[k+1];
682     if (jmin < jmax) {
683       for (j=jmin; j<jmax; j++){
684          vj = bj[j];           /* block col. index of U */
685          u   = ba + j*4;
686          rtmp_ptr = rtmp + vj*4;
687          for (k1=0; k1<4; k1++){
688            *u++        = *rtmp_ptr;
689            *rtmp_ptr++ = 0.0;
690          }
691       }
692 
693       /* ... add k to row list for first nonzero entry in k-th row */
694       il[k] = jmin;
695       i     = bj[jmin];
696       jl[k] = jl[i]; jl[i] = k;
697     }
698   }
699 
700   ierr = PetscFree(rtmp);CHKERRQ(ierr);
701   ierr = PetscFree(il);CHKERRQ(ierr);
702   ierr = PetscFree(dk);CHKERRQ(ierr);
703   if (a->permute) {
704     ierr = PetscFree(aa);CHKERRQ(ierr);
705   }
706   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
707   C->factor    = FACTOR_CHOLESKY;
708   C->assembled = PETSC_TRUE;
709   C->preallocated = PETSC_TRUE;
710   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
711   PetscFunctionReturn(0);
712 }
713 
714 /*
715       Version for when blocks are 2 by 2 Using natural ordering
716 */
717 #undef __FUNCT__
718 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
719 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
720 {
721   Mat                C = *B;
722   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
723   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
724   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
725   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
726   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
727 
728   PetscFunctionBegin;
729 
730   /* initialization */
731   /* il and jl record the first nonzero element in each row of the accessing
732      window U(0:k, k:mbs-1).
733      jl:    list of rows to be added to uneliminated rows
734             i>= k: jl(i) is the first row to be added to row i
735             i<  k: jl(i) is the row following row i in some list of rows
736             jl(i) = mbs indicates the end of a list
737      il(i): points to the first nonzero element in columns k,...,mbs-1 of
738             row i of U */
739   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
740   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
741   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
742   jl   = il + mbs;
743   for (i=0; i<mbs; i++) {
744     jl[i] = mbs; il[0] = 0;
745   }
746   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
747   uik  = dk + 4;
748 
749   ai = a->i; aj = a->j; aa = a->a;
750 
751   /* for each row k */
752   for (k = 0; k<mbs; k++){
753 
754     /*initialize k-th row with elements nonzero in row k of A */
755     jmin = ai[k]; jmax = ai[k+1];
756     ap = aa + jmin*4;
757     for (j = jmin; j < jmax; j++){
758       vj = aj[j];         /* block col. index */
759       rtmp_ptr = rtmp + vj*4;
760       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
761     }
762 
763     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
764     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
765     i = jl[k]; /* first row to be added to k_th row  */
766 
767     while (i < k){
768       nexti = jl[i]; /* next row to be added to k_th row */
769 
770       /* compute multiplier */
771       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
772 
773       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
774       diag = ba + i*4;
775       u    = ba + ili*4;
776       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
777       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
778       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
779       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
780 
781       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
782       dk[0] += uik[0]*u[0] + uik[1]*u[1];
783       dk[1] += uik[2]*u[0] + uik[3]*u[1];
784       dk[2] += uik[0]*u[2] + uik[1]*u[3];
785       dk[3] += uik[2]*u[2] + uik[3]*u[3];
786 
787       /* update -U(i,k): ba[ili] = uik */
788       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
789 
790       /* add multiple of row i to k-th row ... */
791       jmin = ili + 1; jmax = bi[i+1];
792       if (jmin < jmax){
793         for (j=jmin; j<jmax; j++) {
794           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
795           rtmp_ptr = rtmp + bj[j]*4;
796           u = ba + j*4;
797           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
798           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
799           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
800           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
801         }
802 
803         /* ... add i to row list for next nonzero entry */
804         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
805         j     = bj[jmin];
806         jl[i] = jl[j]; jl[j] = i; /* update jl */
807       }
808       i = nexti;
809     }
810 
811     /* save nonzero entries in k-th row of U ... */
812 
813     /* invert diagonal block */
814     diag = ba+k*4;
815     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
816     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
817 
818     jmin = bi[k]; jmax = bi[k+1];
819     if (jmin < jmax) {
820       for (j=jmin; j<jmax; j++){
821          vj = bj[j];           /* block col. index of U */
822          u   = ba + j*4;
823          rtmp_ptr = rtmp + vj*4;
824          for (k1=0; k1<4; k1++){
825            *u++        = *rtmp_ptr;
826            *rtmp_ptr++ = 0.0;
827          }
828       }
829 
830       /* ... add k to row list for first nonzero entry in k-th row */
831       il[k] = jmin;
832       i     = bj[jmin];
833       jl[k] = jl[i]; jl[i] = k;
834     }
835   }
836 
837   ierr = PetscFree(rtmp);CHKERRQ(ierr);
838   ierr = PetscFree(il);CHKERRQ(ierr);
839   ierr = PetscFree(dk);CHKERRQ(ierr);
840 
841   C->factor    = FACTOR_CHOLESKY;
842   C->assembled = PETSC_TRUE;
843   C->preallocated = PETSC_TRUE;
844   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
850     Version for blocks are 1 by 1.
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
854 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
855 {
856   Mat                C = *B;
857   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
858   IS                 ip = b->row;
859   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
860   int                *ai,*aj,*r;
861   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
862   MatScalar          *rtmp;
863   MatScalar          *ba = b->a,*aa,ak;
864   MatScalar          dk,uikdi;
865 
866   PetscFunctionBegin;
867   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
868   if (!a->permute){
869     ai = a->i; aj = a->j; aa = a->a;
870   } else {
871     ai = a->inew; aj = a->jnew;
872     ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
873     ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
874     ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr);
875     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
876 
877     jmin = ai[0]; jmax = ai[mbs];
878     for (j=jmin; j<jmax; j++){
879       while (r[j] != j){
880         k = r[j]; r[j] = r[k]; r[k] = k;
881         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
882       }
883     }
884     ierr = PetscFree(r);CHKERRQ(ierr);
885   }
886 
887   /* initialization */
888   /* il and jl record the first nonzero element in each row of the accessing
889      window U(0:k, k:mbs-1).
890      jl:    list of rows to be added to uneliminated rows
891             i>= k: jl(i) is the first row to be added to row i
892             i<  k: jl(i) is the row following row i in some list of rows
893             jl(i) = mbs indicates the end of a list
894      il(i): points to the first nonzero element in columns k,...,mbs-1 of
895             row i of U */
896   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
897   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
898   jl   = il + mbs;
899   for (i=0; i<mbs; i++) {
900     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
901   }
902 
903   /* for each row k */
904   for (k = 0; k<mbs; k++){
905 
906     /*initialize k-th row with elements nonzero in row perm(k) of A */
907     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
908 
909     for (j = jmin; j < jmax; j++){
910       vj = rip[aj[j]];
911       rtmp[vj] = aa[j];
912     }
913 
914     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
915     dk = rtmp[k];
916     i = jl[k]; /* first row to be added to k_th row  */
917 
918     while (i < k){
919       nexti = jl[i]; /* next row to be added to k_th row */
920 
921       /* compute multiplier, update D(k) and U(i,k) */
922       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
923       uikdi = - ba[ili]*ba[i];
924       dk += uikdi*ba[ili];
925       ba[ili] = uikdi; /* -U(i,k) */
926 
927       /* add multiple of row i to k-th row ... */
928       jmin = ili + 1; jmax = bi[i+1];
929       if (jmin < jmax){
930         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
931         /* ... add i to row list for next nonzero entry */
932         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
933         j     = bj[jmin];
934         jl[i] = jl[j]; jl[j] = i; /* update jl */
935       }
936       i = nexti;
937     }
938 
939     /* check for zero pivot and save diagoanl element */
940     if (dk == 0.0){
941       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
942       /*
943     } else if (PetscRealPart(dk) < 0.0){
944       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
945       */
946     }
947 
948     /* save nonzero entries in k-th row of U ... */
949     ba[k] = 1.0/dk;
950     jmin = bi[k]; jmax = bi[k+1];
951     if (jmin < jmax) {
952       for (j=jmin; j<jmax; j++){
953          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
954       }
955       /* ... add k to row list for first nonzero entry in k-th row */
956       il[k] = jmin;
957       i     = bj[jmin];
958       jl[k] = jl[i]; jl[i] = k;
959     }
960   }
961 
962   ierr = PetscFree(rtmp);CHKERRQ(ierr);
963   ierr = PetscFree(il);CHKERRQ(ierr);
964   if (a->permute){
965     ierr = PetscFree(aa);CHKERRQ(ierr);
966   }
967 
968   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
969   C->factor    = FACTOR_CHOLESKY;
970   C->assembled = PETSC_TRUE;
971   C->preallocated = PETSC_TRUE;
972   PetscLogFlops(b->mbs);
973   PetscFunctionReturn(0);
974 }
975 
976 /*
977   Version for when blocks are 1 by 1 Using natural ordering
978 */
979 #undef __FUNCT__
980 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
981 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
982 {
983   Mat                C = *B;
984   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
985   int                ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
986   int                *ai,*aj;
987   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
988   MatScalar          *rtmp,*ba = b->a,*aa,dk,uikdi;
989 
990   PetscFunctionBegin;
991 
992   /* initialization */
993   /* il and jl record the first nonzero element in each row of the accessing
994      window U(0:k, k:mbs-1).
995      jl:    list of rows to be added to uneliminated rows
996             i>= k: jl(i) is the first row to be added to row i
997             i<  k: jl(i) is the row following row i in some list of rows
998             jl(i) = mbs indicates the end of a list
999      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1000             row i of U */
1001   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1002   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
1003   jl   = il + mbs;
1004   for (i=0; i<mbs; i++) {
1005     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1006   }
1007 
1008   ai = a->i; aj = a->j; aa = a->a;
1009 
1010   /* for each row k */
1011   for (k = 0; k<mbs; k++){
1012 
1013     /*initialize k-th row with elements nonzero in row perm(k) of A */
1014     jmin = ai[k]; jmax = ai[k+1];
1015 
1016     for (j = jmin; j < jmax; j++){
1017       vj = aj[j];
1018       rtmp[vj] = aa[j];
1019     }
1020 
1021     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1022     dk = rtmp[k];
1023     i = jl[k]; /* first row to be added to k_th row  */
1024 
1025     while (i < k){
1026       nexti = jl[i]; /* next row to be added to k_th row */
1027 
1028       /* compute multiplier, update D(k) and U(i,k) */
1029       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1030       uikdi = - ba[ili]*ba[i];
1031       dk += uikdi*ba[ili];
1032       ba[ili] = uikdi; /* -U(i,k) */
1033 
1034       /* add multiple of row i to k-th row ... */
1035       jmin = ili + 1; jmax = bi[i+1];
1036       if (jmin < jmax){
1037         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1038         /* ... add i to row list for next nonzero entry */
1039         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1040         j     = bj[jmin];
1041         jl[i] = jl[j]; jl[j] = i; /* update jl */
1042       }
1043       i = nexti;
1044     }
1045 
1046     /* check for zero pivot and save diagoanl element */
1047     if (dk == 0.0){
1048       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1049       /*
1050     } else if (PetscRealPart(dk) < 0){
1051       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
1052       */
1053     }
1054 
1055     /* save nonzero entries in k-th row of U ... */
1056     ba[k] = 1.0/dk;
1057     jmin = bi[k]; jmax = bi[k+1];
1058     if (jmin < jmax) {
1059       for (j=jmin; j<jmax; j++){
1060          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1061       }
1062       /* ... add k to row list for first nonzero entry in k-th row */
1063       il[k] = jmin;
1064       i     = bj[jmin];
1065       jl[k] = jl[i]; jl[i] = k;
1066     }
1067   }
1068 
1069   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1070   ierr = PetscFree(il);CHKERRQ(ierr);
1071 
1072   C->factor    = FACTOR_CHOLESKY;
1073   C->assembled = PETSC_TRUE;
1074   C->preallocated = PETSC_TRUE;
1075   PetscLogFlops(b->mbs);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ"
1081 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
1082 {
1083   int ierr;
1084   Mat C;
1085 
1086   PetscFunctionBegin;
1087   ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr);
1088   ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr);
1089   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 
1094