xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 456192e25527e6dfd94efe19f31bdabe85ed0893)
1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
2 /* Using Modified Sparse Row (MSR) storage.
3 See page 85, "Iterative Methods ..." by Saad. */
4 
5 /*
6     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
7 */
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/ilu.h"
12 #include "include/petscis.h"
13 
14 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
17 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
20   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
21   int          *jutmp,bs = a->bs,bs2=a->bs2;
22   int          m,realloc = 0,prow;
23   int          *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
24   PetscTruth   perm_identity;
25 
26   PetscFunctionBegin;
27 
28   /* check whether perm is the identity mapping */
29   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
30   if (!perm_identity) a->permute = PETSC_TRUE;
31 
32   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
33 
34   if (perm_identity){ /* without permutation */
35     ai = a->i; aj = a->j;
36   } else {            /* non-trivial permutation */
37     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
38     ai = a->inew; aj = a->jnew;
39   }
40 
41   /* initialization */
42   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
43   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
44   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
45   iu[0] = mbs+1;
46   juidx = mbs + 1; /* index for ju */
47   ierr  = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */
48   q     = jl + mbs;   /* linked list for col index */
49   for (i=0; i<mbs; i++){
50     jl[i] = mbs;
51     q[i] = 0;
52   }
53 
54   /* for each row k */
55   for (k=0; k<mbs; k++){
56     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
57     q[k] = mbs;
58     /* initialize nonzero structure of k-th row to row rip[k] of A */
59     jmin = ai[rip[k]];
60     jmax = ai[rip[k]+1];
61     for (j=jmin; j<jmax; j++){
62       vj = rip[aj[j]]; /* col. value */
63       if(vj > k){
64         qm = k;
65         do {
66           m  = qm; qm = q[m];
67         } while(qm < vj);
68         if (qm == vj) {
69           SETERRQ(1," error: duplicate entry in A\n");
70         }
71         nzk++;
72         q[m]  = vj;
73         q[vj] = qm;
74       } /* if(vj > k) */
75     } /* for (j=jmin; j<jmax; j++) */
76 
77     /* modify nonzero structure of k-th row by computing fill-in
78        for each row i to be merged in */
79     prow = k;
80     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
81 
82     while (prow < k){
83       /* merge row prow into k-th row */
84       jmin = iu[prow] + 1; jmax = iu[prow+1];
85       qm = k;
86       for (j=jmin; j<jmax; j++){
87         vj = ju[j];
88         do {
89           m = qm; qm = q[m];
90         } while (qm < vj);
91         if (qm != vj){
92          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
93         }
94       }
95       prow = jl[prow]; /* next pivot row */
96     }
97 
98     /* add k to row list for first nonzero element in k-th row */
99     if (nzk > 0){
100       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
101       jl[k] = jl[i]; jl[i] = k;
102     }
103     iu[k+1] = iu[k] + nzk;
104 
105     /* allocate more space to ju if needed */
106     if (iu[k+1] > umax) {
107       /* estimate how much additional space we will need */
108       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
109       /* just double the memory each time */
110       maxadd = umax;
111       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
112       umax += maxadd;
113 
114       /* allocate a longer ju */
115       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
116       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
117       ierr = PetscFree(ju);CHKERRQ(ierr);
118       ju   = jutmp;
119       realloc++; /* count how many times we realloc */
120     }
121 
122     /* save nonzero structure of k-th row in ju */
123     i=k;
124     while (nzk --) {
125       i           = q[i];
126       ju[juidx++] = i;
127     }
128   }
129 
130   if (ai[mbs] != 0) {
131     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
132     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
133     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
134     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
135     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
136   } else {
137      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
138   }
139 
140   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
141   /* ierr = PetscFree(q);CHKERRQ(ierr); */
142   ierr = PetscFree(jl);CHKERRQ(ierr);
143 
144   /* put together the new matrix */
145   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
146   /* PetscLogObjectParent(*B,iperm); */
147   b = (Mat_SeqSBAIJ*)(*B)->data;
148   ierr = PetscFree(b->imax);CHKERRQ(ierr);
149   b->singlemalloc = PETSC_FALSE;
150   /* the next line frees the default space generated by the Create() */
151   ierr = PetscFree(b->a);CHKERRQ(ierr);
152   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
153   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
154   b->j    = ju;
155   b->i    = iu;
156   b->diag = 0;
157   b->ilen = 0;
158   b->imax = 0;
159   b->row  = perm;
160   b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
161   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
162   b->icol = perm;
163   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
164   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
165   /* In b structure:  Free imax, ilen, old a, old j.
166      Allocate idnew, solve_work, new a, new j */
167   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
168   b->s_maxnz = b->s_nz = iu[mbs];
169 
170   (*B)->factor                 = FACTOR_CHOLESKY;
171   (*B)->info.factor_mallocs    = realloc;
172   (*B)->info.fill_ratio_given  = f;
173   if (ai[mbs] != 0) {
174     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
175   } else {
176     (*B)->info.fill_ratio_needed = 0.0;
177   }
178 
179   if (perm_identity){
180     switch (bs) {
181       case 1:
182         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
183         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
184         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
185         break;
186       case 2:
187         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
188         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
189         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
190         break;
191       case 3:
192         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
193         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
194         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
195         break;
196       case 4:
197         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
198         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
199         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
200         break;
201       case 5:
202         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
203         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
204         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
205         break;
206       case 6:
207         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
208         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
209         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
210         break;
211       case 7:
212         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
213         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
214         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
215       break;
216       default:
217         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
218         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
219         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
220       break;
221     }
222   }
223 
224   PetscFunctionReturn(0);
225 }
226 
227 
228 #undef __FUNCT__
229 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
230 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
231 {
232   Mat                C = *B;
233   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
234   IS                 perm = b->row;
235   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
236   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
237   int                bs=a->bs,bs2 = a->bs2;
238   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
239   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
240   MatScalar          *work;
241   int                *pivots;
242 
243   PetscFunctionBegin;
244 
245   /* initialization */
246   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
247   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
248   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
249   jl   = il + mbs;
250   for (i=0; i<mbs; i++) {
251     jl[i] = mbs; il[0] = 0;
252   }
253   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
254   uik  = dk + bs2;
255   work = uik + bs2;
256   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
257 
258   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
259 
260   /* check permutation */
261   if (!a->permute){
262     ai = a->i; aj = a->j; aa = a->a;
263   } else {
264     ai   = a->inew; aj = a->jnew;
265     ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
266     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
267     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
268     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
269 
270     for (i=0; i<mbs; i++){
271       jmin = ai[i]; jmax = ai[i+1];
272       for (j=jmin; j<jmax; j++){
273         while (a2anew[j] != j){
274           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
275           for (k1=0; k1<bs2; k1++){
276             dk[k1]       = aa[k*bs2+k1];
277             aa[k*bs2+k1] = aa[j*bs2+k1];
278             aa[j*bs2+k1] = dk[k1];
279           }
280         }
281         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
282         if (i > aj[j]){
283           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
284           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
285           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
286           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
287             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
288           }
289         }
290       }
291     }
292     ierr = PetscFree(a2anew);CHKERRQ(ierr);
293   }
294 
295   /* for each row k */
296   for (k = 0; k<mbs; k++){
297 
298     /*initialize k-th row with elements nonzero in row perm(k) of A */
299     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
300 
301     ap = aa + jmin*bs2;
302     for (j = jmin; j < jmax; j++){
303       vj = perm_ptr[aj[j]];         /* block col. index */
304       rtmp_ptr = rtmp + vj*bs2;
305       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
306     }
307 
308     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
309     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
310     i = jl[k]; /* first row to be added to k_th row  */
311 
312     while (i < k){
313       nexti = jl[i]; /* next row to be added to k_th row */
314 
315       /* compute multiplier */
316       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
317 
318       /* uik = -inv(Di)*U_bar(i,k) */
319       diag = ba + i*bs2;
320       u    = ba + ili*bs2;
321       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
322       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
323 
324       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
325       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
326 
327       /* update -U(i,k) */
328       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
329 
330       /* add multiple of row i to k-th row ... */
331       jmin = ili + 1; jmax = bi[i+1];
332       if (jmin < jmax){
333         for (j=jmin; j<jmax; j++) {
334           /* rtmp += -U(i,k)^T * U_bar(i,j) */
335           rtmp_ptr = rtmp + bj[j]*bs2;
336           u = ba + j*bs2;
337           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
338         }
339 
340         /* ... add i to row list for next nonzero entry */
341         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
342         j     = bj[jmin];
343         jl[i] = jl[j]; jl[j] = i; /* update jl */
344       }
345       i = nexti;
346     }
347 
348     /* save nonzero entries in k-th row of U ... */
349 
350     /* invert diagonal block */
351     diag = ba+k*bs2;
352     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
353     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
354 
355     jmin = bi[k]; jmax = bi[k+1];
356     if (jmin < jmax) {
357       for (j=jmin; j<jmax; j++){
358          vj = bj[j];           /* block col. index of U */
359          u   = ba + j*bs2;
360          rtmp_ptr = rtmp + vj*bs2;
361          for (k1=0; k1<bs2; k1++){
362            *u++        = *rtmp_ptr;
363            *rtmp_ptr++ = 0.0;
364          }
365       }
366 
367       /* ... add k to row list for first nonzero entry in k-th row */
368       il[k] = jmin;
369       i     = bj[jmin];
370       jl[k] = jl[i]; jl[i] = k;
371     }
372   }
373 
374   ierr = PetscFree(rtmp);CHKERRQ(ierr);
375   ierr = PetscFree(il);CHKERRQ(ierr);
376   ierr = PetscFree(dk);CHKERRQ(ierr);
377   ierr = PetscFree(pivots);CHKERRQ(ierr);
378   if (a->permute){
379     ierr = PetscFree(aa);CHKERRQ(ierr);
380   }
381 
382   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
383   C->factor    = FACTOR_CHOLESKY;
384   C->assembled = PETSC_TRUE;
385   C->preallocated = PETSC_TRUE;
386   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
392 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
393 {
394   Mat                C = *B;
395   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
396   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
397   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
398   int                bs=a->bs,bs2 = a->bs2;
399   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
400   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
401   MatScalar          *work;
402   int                *pivots;
403 
404   PetscFunctionBegin;
405 
406   /* initialization */
407 
408   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
409   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
410   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
411   jl   = il + mbs;
412   for (i=0; i<mbs; i++) {
413     jl[i] = mbs; il[0] = 0;
414   }
415   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
416   uik  = dk + bs2;
417   work = uik + bs2;
418   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
419 
420   ai = a->i; aj = a->j; aa = a->a;
421 
422   /* for each row k */
423   for (k = 0; k<mbs; k++){
424 
425     /*initialize k-th row with elements nonzero in row k of A */
426     jmin = ai[k]; jmax = ai[k+1];
427     ap = aa + jmin*bs2;
428     for (j = jmin; j < jmax; j++){
429       vj = aj[j];         /* block col. index */
430       rtmp_ptr = rtmp + vj*bs2;
431       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
432     }
433 
434     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
435     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
436     i = jl[k]; /* first row to be added to k_th row  */
437 
438     while (i < k){
439       nexti = jl[i]; /* next row to be added to k_th row */
440 
441       /* compute multiplier */
442       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
443 
444       /* uik = -inv(Di)*U_bar(i,k) */
445       diag = ba + i*bs2;
446       u    = ba + ili*bs2;
447       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
448       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
449 
450       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
451       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
452 
453       /* update -U(i,k) */
454       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
455 
456       /* add multiple of row i to k-th row ... */
457       jmin = ili + 1; jmax = bi[i+1];
458       if (jmin < jmax){
459         for (j=jmin; j<jmax; j++) {
460           /* rtmp += -U(i,k)^T * U_bar(i,j) */
461           rtmp_ptr = rtmp + bj[j]*bs2;
462           u = ba + j*bs2;
463           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
464         }
465 
466         /* ... add i to row list for next nonzero entry */
467         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
468         j     = bj[jmin];
469         jl[i] = jl[j]; jl[j] = i; /* update jl */
470       }
471       i = nexti;
472     }
473 
474     /* save nonzero entries in k-th row of U ... */
475 
476     /* invert diagonal block */
477     diag = ba+k*bs2;
478     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
479     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
480 
481     jmin = bi[k]; jmax = bi[k+1];
482     if (jmin < jmax) {
483       for (j=jmin; j<jmax; j++){
484          vj = bj[j];           /* block col. index of U */
485          u   = ba + j*bs2;
486          rtmp_ptr = rtmp + vj*bs2;
487          for (k1=0; k1<bs2; k1++){
488            *u++        = *rtmp_ptr;
489            *rtmp_ptr++ = 0.0;
490          }
491       }
492 
493       /* ... add k to row list for first nonzero entry in k-th row */
494       il[k] = jmin;
495       i     = bj[jmin];
496       jl[k] = jl[i]; jl[i] = k;
497     }
498   }
499 
500   ierr = PetscFree(rtmp);CHKERRQ(ierr);
501   ierr = PetscFree(il);CHKERRQ(ierr);
502   ierr = PetscFree(dk);CHKERRQ(ierr);
503   ierr = PetscFree(pivots);CHKERRQ(ierr);
504 
505   C->factor    = FACTOR_CHOLESKY;
506   C->assembled = PETSC_TRUE;
507   C->preallocated = PETSC_TRUE;
508   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
509   PetscFunctionReturn(0);
510 }
511 
512 /*
513     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
514     Version for blocks 2 by 2.
515 */
516 #undef __FUNCT__
517 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
518 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
519 {
520   Mat                C = *B;
521   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
522   IS                 perm = b->row;
523   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
524   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
525   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
526   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
527 
528   PetscFunctionBegin;
529 
530   /* initialization */
531   /* il and jl record the first nonzero element in each row of the accessing
532      window U(0:k, k:mbs-1).
533      jl:    list of rows to be added to uneliminated rows
534             i>= k: jl(i) is the first row to be added to row i
535             i<  k: jl(i) is the row following row i in some list of rows
536             jl(i) = mbs indicates the end of a list
537      il(i): points to the first nonzero element in columns k,...,mbs-1 of
538             row i of U */
539   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
540   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
541   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
542   jl   = il + mbs;
543   for (i=0; i<mbs; i++) {
544     jl[i] = mbs; il[0] = 0;
545   }
546   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
547   uik  = dk + 4;
548   ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
549 
550   /* check permutation */
551   if (!a->permute){
552     ai = a->i; aj = a->j; aa = a->a;
553   } else {
554     ai   = a->inew; aj = a->jnew;
555     ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
556     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
557     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
558     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
559 
560     for (i=0; i<mbs; i++){
561       jmin = ai[i]; jmax = ai[i+1];
562       for (j=jmin; j<jmax; j++){
563         while (a2anew[j] != j){
564           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
565           for (k1=0; k1<4; k1++){
566             dk[k1]       = aa[k*4+k1];
567             aa[k*4+k1] = aa[j*4+k1];
568             aa[j*4+k1] = dk[k1];
569           }
570         }
571         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
572         if (i > aj[j]){
573           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
574           ap = aa + j*4;     /* ptr to the beginning of the block */
575           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
576           ap[1] = ap[2];
577           ap[2] = dk[1];
578         }
579       }
580     }
581     ierr = PetscFree(a2anew);CHKERRQ(ierr);
582   }
583 
584   /* for each row k */
585   for (k = 0; k<mbs; k++){
586 
587     /*initialize k-th row with elements nonzero in row perm(k) of A */
588     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
589     ap = aa + jmin*4;
590     for (j = jmin; j < jmax; j++){
591       vj = perm_ptr[aj[j]];         /* block col. index */
592       rtmp_ptr = rtmp + vj*4;
593       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
594     }
595 
596     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
597     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
598     i = jl[k]; /* first row to be added to k_th row  */
599 
600     while (i < k){
601       nexti = jl[i]; /* next row to be added to k_th row */
602 
603       /* compute multiplier */
604       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
605 
606       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
607       diag = ba + i*4;
608       u    = ba + ili*4;
609       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
610       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
611       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
612       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
613 
614       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
615       dk[0] += uik[0]*u[0] + uik[1]*u[1];
616       dk[1] += uik[2]*u[0] + uik[3]*u[1];
617       dk[2] += uik[0]*u[2] + uik[1]*u[3];
618       dk[3] += uik[2]*u[2] + uik[3]*u[3];
619 
620       /* update -U(i,k): ba[ili] = uik */
621       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
622 
623       /* add multiple of row i to k-th row ... */
624       jmin = ili + 1; jmax = bi[i+1];
625       if (jmin < jmax){
626         for (j=jmin; j<jmax; j++) {
627           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
628           rtmp_ptr = rtmp + bj[j]*4;
629           u = ba + j*4;
630           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
631           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
632           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
633           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
634         }
635 
636         /* ... add i to row list for next nonzero entry */
637         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
638         j     = bj[jmin];
639         jl[i] = jl[j]; jl[j] = i; /* update jl */
640       }
641       i = nexti;
642     }
643 
644     /* save nonzero entries in k-th row of U ... */
645 
646     /* invert diagonal block */
647     diag = ba+k*4;
648     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
649     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
650 
651     jmin = bi[k]; jmax = bi[k+1];
652     if (jmin < jmax) {
653       for (j=jmin; j<jmax; j++){
654          vj = bj[j];           /* block col. index of U */
655          u   = ba + j*4;
656          rtmp_ptr = rtmp + vj*4;
657          for (k1=0; k1<4; k1++){
658            *u++        = *rtmp_ptr;
659            *rtmp_ptr++ = 0.0;
660          }
661       }
662 
663       /* ... add k to row list for first nonzero entry in k-th row */
664       il[k] = jmin;
665       i     = bj[jmin];
666       jl[k] = jl[i]; jl[i] = k;
667     }
668   }
669 
670   ierr = PetscFree(rtmp);CHKERRQ(ierr);
671   ierr = PetscFree(il);CHKERRQ(ierr);
672   ierr = PetscFree(dk);CHKERRQ(ierr);
673   if (a->permute) {
674     ierr = PetscFree(aa);CHKERRQ(ierr);
675   }
676   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
677   C->factor    = FACTOR_CHOLESKY;
678   C->assembled = PETSC_TRUE;
679   C->preallocated = PETSC_TRUE;
680   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
681   PetscFunctionReturn(0);
682 }
683 
684 /*
685       Version for when blocks are 2 by 2 Using natural ordering
686 */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
689 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
690 {
691   Mat                C = *B;
692   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
693   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
694   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
695   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
696   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
697 
698   PetscFunctionBegin;
699 
700   /* initialization */
701   /* il and jl record the first nonzero element in each row of the accessing
702      window U(0:k, k:mbs-1).
703      jl:    list of rows to be added to uneliminated rows
704             i>= k: jl(i) is the first row to be added to row i
705             i<  k: jl(i) is the row following row i in some list of rows
706             jl(i) = mbs indicates the end of a list
707      il(i): points to the first nonzero element in columns k,...,mbs-1 of
708             row i of U */
709   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
710   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
711   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
712   jl   = il + mbs;
713   for (i=0; i<mbs; i++) {
714     jl[i] = mbs; il[0] = 0;
715   }
716   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
717   uik  = dk + 4;
718 
719   ai = a->i; aj = a->j; aa = a->a;
720 
721   /* for each row k */
722   for (k = 0; k<mbs; k++){
723 
724     /*initialize k-th row with elements nonzero in row k of A */
725     jmin = ai[k]; jmax = ai[k+1];
726     ap = aa + jmin*4;
727     for (j = jmin; j < jmax; j++){
728       vj = aj[j];         /* block col. index */
729       rtmp_ptr = rtmp + vj*4;
730       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
731     }
732 
733     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
734     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
735     i = jl[k]; /* first row to be added to k_th row  */
736 
737     while (i < k){
738       nexti = jl[i]; /* next row to be added to k_th row */
739 
740       /* compute multiplier */
741       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
742 
743       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
744       diag = ba + i*4;
745       u    = ba + ili*4;
746       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
747       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
748       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
749       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
750 
751       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
752       dk[0] += uik[0]*u[0] + uik[1]*u[1];
753       dk[1] += uik[2]*u[0] + uik[3]*u[1];
754       dk[2] += uik[0]*u[2] + uik[1]*u[3];
755       dk[3] += uik[2]*u[2] + uik[3]*u[3];
756 
757       /* update -U(i,k): ba[ili] = uik */
758       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
759 
760       /* add multiple of row i to k-th row ... */
761       jmin = ili + 1; jmax = bi[i+1];
762       if (jmin < jmax){
763         for (j=jmin; j<jmax; j++) {
764           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
765           rtmp_ptr = rtmp + bj[j]*4;
766           u = ba + j*4;
767           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
768           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
769           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
770           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
771         }
772 
773         /* ... add i to row list for next nonzero entry */
774         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
775         j     = bj[jmin];
776         jl[i] = jl[j]; jl[j] = i; /* update jl */
777       }
778       i = nexti;
779     }
780 
781     /* save nonzero entries in k-th row of U ... */
782 
783     /* invert diagonal block */
784     diag = ba+k*4;
785     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
786     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
787 
788     jmin = bi[k]; jmax = bi[k+1];
789     if (jmin < jmax) {
790       for (j=jmin; j<jmax; j++){
791          vj = bj[j];           /* block col. index of U */
792          u   = ba + j*4;
793          rtmp_ptr = rtmp + vj*4;
794          for (k1=0; k1<4; k1++){
795            *u++        = *rtmp_ptr;
796            *rtmp_ptr++ = 0.0;
797          }
798       }
799 
800       /* ... add k to row list for first nonzero entry in k-th row */
801       il[k] = jmin;
802       i     = bj[jmin];
803       jl[k] = jl[i]; jl[i] = k;
804     }
805   }
806 
807   ierr = PetscFree(rtmp);CHKERRQ(ierr);
808   ierr = PetscFree(il);CHKERRQ(ierr);
809   ierr = PetscFree(dk);CHKERRQ(ierr);
810 
811   C->factor    = FACTOR_CHOLESKY;
812   C->assembled = PETSC_TRUE;
813   C->preallocated = PETSC_TRUE;
814   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
815   PetscFunctionReturn(0);
816 }
817 
818 /*
819     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
820     Version for blocks are 1 by 1.
821 */
822 #undef __FUNCT__
823 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
824 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
825 {
826   Mat                C = *B;
827   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
828   IS                 ip = b->row;
829   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
830   int                *ai,*aj,*r;
831   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
832   MatScalar          *rtmp;
833   MatScalar          *ba = b->a,*aa,ak;
834   MatScalar          dk,uikdi;
835 
836   PetscFunctionBegin;
837   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
838   if (!a->permute){
839     ai = a->i; aj = a->j; aa = a->a;
840   } else {
841     ai = a->inew; aj = a->jnew;
842     ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
843     ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
844     ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr);
845     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
846 
847     jmin = ai[0]; jmax = ai[mbs];
848     for (j=jmin; j<jmax; j++){
849       while (r[j] != j){
850         k = r[j]; r[j] = r[k]; r[k] = k;
851         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
852       }
853     }
854     ierr = PetscFree(r);CHKERRQ(ierr);
855   }
856 
857   /* initialization */
858   /* il and jl record the first nonzero element in each row of the accessing
859      window U(0:k, k:mbs-1).
860      jl:    list of rows to be added to uneliminated rows
861             i>= k: jl(i) is the first row to be added to row i
862             i<  k: jl(i) is the row following row i in some list of rows
863             jl(i) = mbs indicates the end of a list
864      il(i): points to the first nonzero element in columns k,...,mbs-1 of
865             row i of U */
866   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
867   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
868   jl   = il + mbs;
869   for (i=0; i<mbs; i++) {
870     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
871   }
872 
873   /* for each row k */
874   for (k = 0; k<mbs; k++){
875 
876     /*initialize k-th row with elements nonzero in row perm(k) of A */
877     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
878 
879     for (j = jmin; j < jmax; j++){
880       vj = rip[aj[j]];
881       rtmp[vj] = aa[j];
882     }
883 
884     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
885     dk = rtmp[k];
886     i = jl[k]; /* first row to be added to k_th row  */
887 
888     while (i < k){
889       nexti = jl[i]; /* next row to be added to k_th row */
890 
891       /* compute multiplier, update D(k) and U(i,k) */
892       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
893       uikdi = - ba[ili]*ba[i];
894       dk += uikdi*ba[ili];
895       ba[ili] = uikdi; /* -U(i,k) */
896 
897       /* add multiple of row i to k-th row ... */
898       jmin = ili + 1; jmax = bi[i+1];
899       if (jmin < jmax){
900         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
901         /* ... add i to row list for next nonzero entry */
902         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
903         j     = bj[jmin];
904         jl[i] = jl[j]; jl[j] = i; /* update jl */
905       }
906       i = nexti;
907     }
908 
909     /* check for zero pivot and save diagoanl element */
910     if (dk == 0.0){
911       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
912       /*
913     } else if (PetscRealPart(dk) < 0.0){
914       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
915       */
916     }
917 
918     /* save nonzero entries in k-th row of U ... */
919     ba[k] = 1.0/dk;
920     jmin = bi[k]; jmax = bi[k+1];
921     if (jmin < jmax) {
922       for (j=jmin; j<jmax; j++){
923          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
924       }
925       /* ... add k to row list for first nonzero entry in k-th row */
926       il[k] = jmin;
927       i     = bj[jmin];
928       jl[k] = jl[i]; jl[i] = k;
929     }
930   }
931 
932   ierr = PetscFree(rtmp);CHKERRQ(ierr);
933   ierr = PetscFree(il);CHKERRQ(ierr);
934   if (a->permute){
935     ierr = PetscFree(aa);CHKERRQ(ierr);
936   }
937 
938   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
939   C->factor    = FACTOR_CHOLESKY;
940   C->assembled = PETSC_TRUE;
941   C->preallocated = PETSC_TRUE;
942   PetscLogFlops(b->mbs);
943   PetscFunctionReturn(0);
944 }
945 
946 /*
947   Version for when blocks are 1 by 1 Using natural ordering
948 */
949 #undef __FUNCT__
950 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
951 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
952 {
953   Mat                C = *B;
954   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
955   int                ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
956   int                *ai,*aj;
957   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
958   MatScalar          *rtmp,*ba = b->a,*aa,dk,uikdi;
959 
960   PetscFunctionBegin;
961 
962   /* initialization */
963   /* il and jl record the first nonzero element in each row of the accessing
964      window U(0:k, k:mbs-1).
965      jl:    list of rows to be added to uneliminated rows
966             i>= k: jl(i) is the first row to be added to row i
967             i<  k: jl(i) is the row following row i in some list of rows
968             jl(i) = mbs indicates the end of a list
969      il(i): points to the first nonzero element in columns k,...,mbs-1 of
970             row i of U */
971   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
972   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
973   jl   = il + mbs;
974   for (i=0; i<mbs; i++) {
975     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
976   }
977 
978   ai = a->i; aj = a->j; aa = a->a;
979 
980   /* for each row k */
981   for (k = 0; k<mbs; k++){
982 
983     /*initialize k-th row with elements nonzero in row perm(k) of A */
984     jmin = ai[k]; jmax = ai[k+1];
985 
986     for (j = jmin; j < jmax; j++){
987       vj = aj[j];
988       rtmp[vj] = aa[j];
989     }
990 
991     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
992     dk = rtmp[k];
993     i = jl[k]; /* first row to be added to k_th row  */
994 
995     while (i < k){
996       nexti = jl[i]; /* next row to be added to k_th row */
997 
998       /* compute multiplier, update D(k) and U(i,k) */
999       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1000       uikdi = - ba[ili]*ba[i];
1001       dk += uikdi*ba[ili];
1002       ba[ili] = uikdi; /* -U(i,k) */
1003 
1004       /* add multiple of row i to k-th row ... */
1005       jmin = ili + 1; jmax = bi[i+1];
1006       if (jmin < jmax){
1007         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1008         /* ... add i to row list for next nonzero entry */
1009         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1010         j     = bj[jmin];
1011         jl[i] = jl[j]; jl[j] = i; /* update jl */
1012       }
1013       i = nexti;
1014     }
1015 
1016     /* check for zero pivot and save diagoanl element */
1017     if (dk == 0.0){
1018       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1019       /*
1020     } else if (PetscRealPart(dk) < 0){
1021       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
1022       */
1023     }
1024 
1025     /* save nonzero entries in k-th row of U ... */
1026     ba[k] = 1.0/dk;
1027     jmin = bi[k]; jmax = bi[k+1];
1028     if (jmin < jmax) {
1029       for (j=jmin; j<jmax; j++){
1030          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1031       }
1032       /* ... add k to row list for first nonzero entry in k-th row */
1033       il[k] = jmin;
1034       i     = bj[jmin];
1035       jl[k] = jl[i]; jl[i] = k;
1036     }
1037   }
1038 
1039   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1040   ierr = PetscFree(il);CHKERRQ(ierr);
1041 
1042   C->factor    = FACTOR_CHOLESKY;
1043   C->assembled = PETSC_TRUE;
1044   C->preallocated = PETSC_TRUE;
1045   PetscLogFlops(b->mbs);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ"
1051 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
1052 {
1053   int ierr;
1054   Mat C;
1055 
1056   PetscFunctionBegin;
1057   ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr);
1058   ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr);
1059   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 
1064