xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
2 /* Using Modified Sparse Row (MSR) storage.
3 See page 85, "Iterative Methods ..." by Saad. */
4 
5 /*
6     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
7 */
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/ilu.h"
12 #include "include/petscis.h"
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
16 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
17 {
18   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
19   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
20   int          *jutmp,bs = a->bs,bs2=a->bs2;
21   int          m,nzi,realloc = 0;
22   int          *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
23   PetscTruth   perm_identity;
24 
25   PetscFunctionBegin;
26 
27   /* check whether perm is the identity mapping */
28   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
29   if (!perm_identity) a->permute = PETSC_TRUE;
30 
31   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
32 
33   if (perm_identity){ /* without permutation */
34     ai = a->i; aj = a->j;
35   } else {            /* non-trivial permutation */
36     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
37     ai = a->inew; aj = a->jnew;
38   }
39 
40   /* initialization */
41   /* Don't know how many column pointers are needed so estimate.
42      Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */
43   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
44   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
45   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
46   iu[0] = mbs+1;
47   juptr = mbs;
48   ierr  = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr);
49   ierr  = PetscMalloc(mbs*sizeof(int),&q);CHKERRQ(ierr);
50   for (i=0; i<mbs; i++){
51     jl[i] = mbs; q[i] = 0;
52   }
53 
54   /* for each row k */
55   for (k=0; k<mbs; k++){
56     nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
57     q[k] = mbs;
58     /* initialize nonzero structure of k-th row to row rip[k] of A */
59     jmin = ai[rip[k]];
60     jmax = ai[rip[k]+1];
61     for (j=jmin; j<jmax; j++){
62       vj = rip[aj[j]]; /* col. value */
63       if(vj > k){
64         qm = k;
65         do {
66           m  = qm; qm = q[m];
67         } while(qm < vj);
68         if (qm == vj) {
69           printf(" error: duplicate entry in A\n"); break;
70         }
71         nzk++;
72         q[m] = vj;
73         q[vj] = qm;
74       } /* if(vj > k) */
75     } /* for (j=jmin; j<jmax; j++) */
76 
77     /* modify nonzero structure of k-th row by computing fill-in
78        for each row i to be merged in */
79     i = k;
80     i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */
81     /* printf(" next pivot row i=%d\n",i); */
82     while (i < mbs){
83       /* merge row i into k-th row */
84       nzi = iu[i+1] - (iu[i]+1);
85       jmin = iu[i] + 1; jmax = iu[i] + nzi;
86       qm = k;
87       for (j=jmin; j<jmax+1; j++){
88         vj = ju[j];
89         do {
90           m = qm; qm = q[m];
91         } while (qm < vj);
92         if (qm != vj){
93          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
94         }
95       }
96       i = jl[i]; /* next pivot row */
97     }
98 
99     /* add k to row list for first nonzero element in k-th row */
100     if (nzk > 0){
101       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
102       jl[k] = jl[i]; jl[i] = k;
103     }
104     iu[k+1] = iu[k] + nzk;   /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/
105 
106     /* allocate more space to ju if needed */
107     if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax);
108       /* estimate how much additional space we will need */
109       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
110       /* just double the memory each time */
111       maxadd = umax;
112       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
113       umax += maxadd;
114 
115       /* allocate a longer ju */
116       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
117       ierr  = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
118       ierr  = PetscFree(ju);CHKERRQ(ierr);
119       ju    = jutmp;
120       realloc++; /* count how many times we realloc */
121     }
122 
123     /* save nonzero structure of k-th row in ju */
124     i=k;
125     jumin = juptr + 1; juptr += nzk;
126     for (j=jumin; j<juptr+1; j++){
127       i=q[i];
128       ju[j]=i;
129     }
130   }
131 
132   if (ai[mbs] != 0) {
133     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
134     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
135     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af);
136     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af);
137     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
138   } else {
139      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
140   }
141 
142   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
143   ierr = PetscFree(q);CHKERRQ(ierr);
144   ierr = PetscFree(jl);CHKERRQ(ierr);
145 
146   /* put together the new matrix */
147   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
148   /* PetscLogObjectParent(*B,iperm); */
149   b = (Mat_SeqSBAIJ*)(*B)->data;
150   ierr = PetscFree(b->imax);CHKERRQ(ierr);
151   b->singlemalloc = PETSC_FALSE;
152   /* the next line frees the default space generated by the Create() */
153   ierr = PetscFree(b->a);CHKERRQ(ierr);
154   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
155   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
156   b->j    = ju;
157   b->i    = iu;
158   b->diag = 0;
159   b->ilen = 0;
160   b->imax = 0;
161   b->row  = perm;
162   b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
163   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
164   b->icol = perm;
165   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
166   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
167   /* In b structure:  Free imax, ilen, old a, old j.
168      Allocate idnew, solve_work, new a, new j */
169   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
170   b->s_maxnz = b->s_nz = iu[mbs];
171 
172   (*B)->factor                 = FACTOR_CHOLESKY;
173   (*B)->info.factor_mallocs    = realloc;
174   (*B)->info.fill_ratio_given  = f;
175   if (ai[mbs] != 0) {
176     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
177   } else {
178     (*B)->info.fill_ratio_needed = 0.0;
179   }
180 
181   if (perm_identity){
182     switch (bs) {
183       case 1:
184         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
185         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
186         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
187         break;
188       case 2:
189         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
190         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
191         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
192         break;
193       case 3:
194         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
195         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
196         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
197         break;
198       case 4:
199         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
200         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
201         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
202         break;
203       case 5:
204         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
205         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
206         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
207         break;
208       case 6:
209         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
210         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
211         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
212         break;
213       case 7:
214         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
215         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
216         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
217       break;
218       default:
219         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
220         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
221         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
222       break;
223     }
224   }
225 
226   PetscFunctionReturn(0);
227 }
228 
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
232 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
233 {
234   Mat                C = *B;
235   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
236   IS                 perm = b->row;
237   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
238   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
239   int                bs=a->bs,bs2 = a->bs2;
240   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
241   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
242   MatScalar          *work;
243   int                *pivots;
244 
245   PetscFunctionBegin;
246 
247   /* initialization */
248   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
249   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
250   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
251   jl   = il + mbs;
252   for (i=0; i<mbs; i++) {
253     jl[i] = mbs; il[0] = 0;
254   }
255   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
256   uik  = dk + bs2;
257   work = uik + bs2;
258   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
259 
260   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
261 
262   /* check permutation */
263   if (!a->permute){
264     ai = a->i; aj = a->j; aa = a->a;
265   } else {
266     ai   = a->inew; aj = a->jnew;
267     ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
268     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
269     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
270     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
271 
272     for (i=0; i<mbs; i++){
273       jmin = ai[i]; jmax = ai[i+1];
274       for (j=jmin; j<jmax; j++){
275         while (a2anew[j] != j){
276           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
277           for (k1=0; k1<bs2; k1++){
278             dk[k1]       = aa[k*bs2+k1];
279             aa[k*bs2+k1] = aa[j*bs2+k1];
280             aa[j*bs2+k1] = dk[k1];
281           }
282         }
283         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
284         if (i > aj[j]){
285           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
286           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
287           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
288           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
289             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
290           }
291         }
292       }
293     }
294     ierr = PetscFree(a2anew);CHKERRQ(ierr);
295   }
296 
297   /* for each row k */
298   for (k = 0; k<mbs; k++){
299 
300     /*initialize k-th row with elements nonzero in row perm(k) of A */
301     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
302     if (jmin < jmax) {
303       ap = aa + jmin*bs2;
304       for (j = jmin; j < jmax; j++){
305         vj = perm_ptr[aj[j]];         /* block col. index */
306         rtmp_ptr = rtmp + vj*bs2;
307         for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
308       }
309     }
310 
311     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
312     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
313     i = jl[k]; /* first row to be added to k_th row  */
314 
315     while (i < mbs){
316       nexti = jl[i]; /* next row to be added to k_th row */
317 
318       /* compute multiplier */
319       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
320 
321       /* uik = -inv(Di)*U_bar(i,k) */
322       diag = ba + i*bs2;
323       u    = ba + ili*bs2;
324       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
325       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
326 
327       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
328       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
329 
330       /* update -U(i,k) */
331       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
332 
333       /* add multiple of row i to k-th row ... */
334       jmin = ili + 1; jmax = bi[i+1];
335       if (jmin < jmax){
336         for (j=jmin; j<jmax; j++) {
337           /* rtmp += -U(i,k)^T * U_bar(i,j) */
338           rtmp_ptr = rtmp + bj[j]*bs2;
339           u = ba + j*bs2;
340           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
341         }
342 
343         /* ... add i to row list for next nonzero entry */
344         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
345         j     = bj[jmin];
346         jl[i] = jl[j]; jl[j] = i; /* update jl */
347       }
348       i = nexti;
349     }
350 
351     /* save nonzero entries in k-th row of U ... */
352 
353     /* invert diagonal block */
354     diag = ba+k*bs2;
355     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
356     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
357 
358     jmin = bi[k]; jmax = bi[k+1];
359     if (jmin < jmax) {
360       for (j=jmin; j<jmax; j++){
361          vj = bj[j];           /* block col. index of U */
362          u   = ba + j*bs2;
363          rtmp_ptr = rtmp + vj*bs2;
364          for (k1=0; k1<bs2; k1++){
365            *u++        = *rtmp_ptr;
366            *rtmp_ptr++ = 0.0;
367          }
368       }
369 
370       /* ... add k to row list for first nonzero entry in k-th row */
371       il[k] = jmin;
372       i     = bj[jmin];
373       jl[k] = jl[i]; jl[i] = k;
374     }
375   }
376 
377   ierr = PetscFree(rtmp);CHKERRQ(ierr);
378   ierr = PetscFree(il);CHKERRQ(ierr);
379   ierr = PetscFree(dk);CHKERRQ(ierr);
380   ierr = PetscFree(pivots);CHKERRQ(ierr);
381   if (a->permute){
382     ierr = PetscFree(aa);CHKERRQ(ierr);
383   }
384 
385   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
386   C->factor    = FACTOR_CHOLESKY;
387   C->assembled = PETSC_TRUE;
388   C->preallocated = PETSC_TRUE;
389   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
395 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
396 {
397   Mat                C = *B;
398   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
399   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
400   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
401   int                bs=a->bs,bs2 = a->bs2;
402   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
403   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
404   MatScalar          *work;
405   int                *pivots;
406 
407   PetscFunctionBegin;
408 
409   /* initialization */
410 
411   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
412   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
413   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
414   jl   = il + mbs;
415   for (i=0; i<mbs; i++) {
416     jl[i] = mbs; il[0] = 0;
417   }
418   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
419   uik  = dk + bs2;
420   work = uik + bs2;
421   ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr);
422 
423   ai = a->i; aj = a->j; aa = a->a;
424 
425   /* for each row k */
426   for (k = 0; k<mbs; k++){
427 
428     /*initialize k-th row with elements nonzero in row k of A */
429     jmin = ai[k]; jmax = ai[k+1];
430     if (jmin < jmax) {
431       ap = aa + jmin*bs2;
432       for (j = jmin; j < jmax; j++){
433         vj = aj[j];         /* block col. index */
434         rtmp_ptr = rtmp + vj*bs2;
435         for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
436       }
437     }
438 
439     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
440     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
441     i = jl[k]; /* first row to be added to k_th row  */
442 
443     while (i < mbs){
444       nexti = jl[i]; /* next row to be added to k_th row */
445 
446       /* compute multiplier */
447       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
448 
449       /* uik = -inv(Di)*U_bar(i,k) */
450       diag = ba + i*bs2;
451       u    = ba + ili*bs2;
452       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
453       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
454 
455       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
456       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
457 
458       /* update -U(i,k) */
459       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
460 
461       /* add multiple of row i to k-th row ... */
462       jmin = ili + 1; jmax = bi[i+1];
463       if (jmin < jmax){
464         for (j=jmin; j<jmax; j++) {
465           /* rtmp += -U(i,k)^T * U_bar(i,j) */
466           rtmp_ptr = rtmp + bj[j]*bs2;
467           u = ba + j*bs2;
468           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
469         }
470 
471         /* ... add i to row list for next nonzero entry */
472         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
473         j     = bj[jmin];
474         jl[i] = jl[j]; jl[j] = i; /* update jl */
475       }
476       i = nexti;
477     }
478 
479     /* save nonzero entries in k-th row of U ... */
480 
481     /* invert diagonal block */
482     diag = ba+k*bs2;
483     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
484     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
485 
486     jmin = bi[k]; jmax = bi[k+1];
487     if (jmin < jmax) {
488       for (j=jmin; j<jmax; j++){
489          vj = bj[j];           /* block col. index of U */
490          u   = ba + j*bs2;
491          rtmp_ptr = rtmp + vj*bs2;
492          for (k1=0; k1<bs2; k1++){
493            *u++        = *rtmp_ptr;
494            *rtmp_ptr++ = 0.0;
495          }
496       }
497 
498       /* ... add k to row list for first nonzero entry in k-th row */
499       il[k] = jmin;
500       i     = bj[jmin];
501       jl[k] = jl[i]; jl[i] = k;
502     }
503   }
504 
505   ierr = PetscFree(rtmp);CHKERRQ(ierr);
506   ierr = PetscFree(il);CHKERRQ(ierr);
507   ierr = PetscFree(dk);CHKERRQ(ierr);
508   ierr = PetscFree(pivots);CHKERRQ(ierr);
509 
510   C->factor    = FACTOR_CHOLESKY;
511   C->assembled = PETSC_TRUE;
512   C->preallocated = PETSC_TRUE;
513   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
514   PetscFunctionReturn(0);
515 }
516 
517 /*
518     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
519     Version for blocks 2 by 2.
520 */
521 #undef __FUNCT__
522 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
523 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
524 {
525   Mat                C = *B;
526   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
527   IS                 perm = b->row;
528   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
529   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
530   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
531   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
532 
533   PetscFunctionBegin;
534 
535   /* initialization */
536   /* il and jl record the first nonzero element in each row of the accessing
537      window U(0:k, k:mbs-1).
538      jl:    list of rows to be added to uneliminated rows
539             i>= k: jl(i) is the first row to be added to row i
540             i<  k: jl(i) is the row following row i in some list of rows
541             jl(i) = mbs indicates the end of a list
542      il(i): points to the first nonzero element in columns k,...,mbs-1 of
543             row i of U */
544   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
545   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
546   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
547   jl   = il + mbs;
548   for (i=0; i<mbs; i++) {
549     jl[i] = mbs; il[0] = 0;
550   }
551   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
552   uik  = dk + 4;
553   ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
554 
555   /* check permutation */
556   if (!a->permute){
557     ai = a->i; aj = a->j; aa = a->a;
558   } else {
559     ai   = a->inew; aj = a->jnew;
560     ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
561     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
562     ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr);
563     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
564 
565     for (i=0; i<mbs; i++){
566       jmin = ai[i]; jmax = ai[i+1];
567       for (j=jmin; j<jmax; j++){
568         while (a2anew[j] != j){
569           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
570           for (k1=0; k1<4; k1++){
571             dk[k1]       = aa[k*4+k1];
572             aa[k*4+k1] = aa[j*4+k1];
573             aa[j*4+k1] = dk[k1];
574           }
575         }
576         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
577         if (i > aj[j]){
578           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
579           ap = aa + j*4;     /* ptr to the beginning of the block */
580           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
581           ap[1] = ap[2];
582           ap[2] = dk[1];
583         }
584       }
585     }
586     ierr = PetscFree(a2anew);CHKERRQ(ierr);
587   }
588 
589   /* for each row k */
590   for (k = 0; k<mbs; k++){
591 
592     /*initialize k-th row with elements nonzero in row perm(k) of A */
593     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
594     if (jmin < jmax) {
595       ap = aa + jmin*4;
596       for (j = jmin; j < jmax; j++){
597         vj = perm_ptr[aj[j]];         /* block col. index */
598         rtmp_ptr = rtmp + vj*4;
599         for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
600       }
601     }
602 
603     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
604     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
605     i = jl[k]; /* first row to be added to k_th row  */
606 
607     while (i < mbs){
608       nexti = jl[i]; /* next row to be added to k_th row */
609 
610       /* compute multiplier */
611       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
612 
613       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
614       diag = ba + i*4;
615       u    = ba + ili*4;
616       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
617       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
618       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
619       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
620 
621       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
622       dk[0] += uik[0]*u[0] + uik[1]*u[1];
623       dk[1] += uik[2]*u[0] + uik[3]*u[1];
624       dk[2] += uik[0]*u[2] + uik[1]*u[3];
625       dk[3] += uik[2]*u[2] + uik[3]*u[3];
626 
627       /* update -U(i,k): ba[ili] = uik */
628       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
629 
630       /* add multiple of row i to k-th row ... */
631       jmin = ili + 1; jmax = bi[i+1];
632       if (jmin < jmax){
633         for (j=jmin; j<jmax; j++) {
634           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
635           rtmp_ptr = rtmp + bj[j]*4;
636           u = ba + j*4;
637           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
638           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
639           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
640           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
641         }
642 
643         /* ... add i to row list for next nonzero entry */
644         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
645         j     = bj[jmin];
646         jl[i] = jl[j]; jl[j] = i; /* update jl */
647       }
648       i = nexti;
649     }
650 
651     /* save nonzero entries in k-th row of U ... */
652 
653     /* invert diagonal block */
654     diag = ba+k*4;
655     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
656     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
657 
658     jmin = bi[k]; jmax = bi[k+1];
659     if (jmin < jmax) {
660       for (j=jmin; j<jmax; j++){
661          vj = bj[j];           /* block col. index of U */
662          u   = ba + j*4;
663          rtmp_ptr = rtmp + vj*4;
664          for (k1=0; k1<4; k1++){
665            *u++        = *rtmp_ptr;
666            *rtmp_ptr++ = 0.0;
667          }
668       }
669 
670       /* ... add k to row list for first nonzero entry in k-th row */
671       il[k] = jmin;
672       i     = bj[jmin];
673       jl[k] = jl[i]; jl[i] = k;
674     }
675   }
676 
677   ierr = PetscFree(rtmp);CHKERRQ(ierr);
678   ierr = PetscFree(il);CHKERRQ(ierr);
679   ierr = PetscFree(dk);CHKERRQ(ierr);
680   if (a->permute) {
681     ierr = PetscFree(aa);CHKERRQ(ierr);
682   }
683   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
684   C->factor    = FACTOR_CHOLESKY;
685   C->assembled = PETSC_TRUE;
686   C->preallocated = PETSC_TRUE;
687   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692       Version for when blocks are 2 by 2 Using natural ordering
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
696 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
697 {
698   Mat                C = *B;
699   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
700   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
701   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
702   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
703   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
704 
705   PetscFunctionBegin;
706 
707   /* initialization */
708   /* il and jl record the first nonzero element in each row of the accessing
709      window U(0:k, k:mbs-1).
710      jl:    list of rows to be added to uneliminated rows
711             i>= k: jl(i) is the first row to be added to row i
712             i<  k: jl(i) is the row following row i in some list of rows
713             jl(i) = mbs indicates the end of a list
714      il(i): points to the first nonzero element in columns k,...,mbs-1 of
715             row i of U */
716   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
717   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
718   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
719   jl   = il + mbs;
720   for (i=0; i<mbs; i++) {
721     jl[i] = mbs; il[0] = 0;
722   }
723   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
724   uik  = dk + 4;
725 
726   ai = a->i; aj = a->j; aa = a->a;
727 
728   /* for each row k */
729   for (k = 0; k<mbs; k++){
730 
731     /*initialize k-th row with elements nonzero in row k of A */
732     jmin = ai[k]; jmax = ai[k+1];
733     if (jmin < jmax) {
734       ap = aa + jmin*4;
735       for (j = jmin; j < jmax; j++){
736         vj = aj[j];         /* block col. index */
737         rtmp_ptr = rtmp + vj*4;
738         for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
739       }
740     }
741 
742     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
743     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
744     i = jl[k]; /* first row to be added to k_th row  */
745 
746     while (i < mbs){
747       nexti = jl[i]; /* next row to be added to k_th row */
748 
749       /* compute multiplier */
750       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
751 
752       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
753       diag = ba + i*4;
754       u    = ba + ili*4;
755       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
756       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
757       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
758       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
759 
760       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
761       dk[0] += uik[0]*u[0] + uik[1]*u[1];
762       dk[1] += uik[2]*u[0] + uik[3]*u[1];
763       dk[2] += uik[0]*u[2] + uik[1]*u[3];
764       dk[3] += uik[2]*u[2] + uik[3]*u[3];
765 
766       /* update -U(i,k): ba[ili] = uik */
767       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
768 
769       /* add multiple of row i to k-th row ... */
770       jmin = ili + 1; jmax = bi[i+1];
771       if (jmin < jmax){
772         for (j=jmin; j<jmax; j++) {
773           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
774           rtmp_ptr = rtmp + bj[j]*4;
775           u = ba + j*4;
776           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
777           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
778           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
779           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
780         }
781 
782         /* ... add i to row list for next nonzero entry */
783         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
784         j     = bj[jmin];
785         jl[i] = jl[j]; jl[j] = i; /* update jl */
786       }
787       i = nexti;
788     }
789 
790     /* save nonzero entries in k-th row of U ... */
791 
792     /* invert diagonal block */
793     diag = ba+k*4;
794     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
795     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
796 
797     jmin = bi[k]; jmax = bi[k+1];
798     if (jmin < jmax) {
799       for (j=jmin; j<jmax; j++){
800          vj = bj[j];           /* block col. index of U */
801          u   = ba + j*4;
802          rtmp_ptr = rtmp + vj*4;
803          for (k1=0; k1<4; k1++){
804            *u++        = *rtmp_ptr;
805            *rtmp_ptr++ = 0.0;
806          }
807       }
808 
809       /* ... add k to row list for first nonzero entry in k-th row */
810       il[k] = jmin;
811       i     = bj[jmin];
812       jl[k] = jl[i]; jl[i] = k;
813     }
814   }
815 
816   ierr = PetscFree(rtmp);CHKERRQ(ierr);
817   ierr = PetscFree(il);CHKERRQ(ierr);
818   ierr = PetscFree(dk);CHKERRQ(ierr);
819 
820   C->factor    = FACTOR_CHOLESKY;
821   C->assembled = PETSC_TRUE;
822   C->preallocated = PETSC_TRUE;
823   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
824   PetscFunctionReturn(0);
825 }
826 
827 /*
828     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
829     Version for blocks are 1 by 1.
830 */
831 #undef __FUNCT__
832 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
833 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
834 {
835   Mat                C = *B;
836   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
837   IS                 ip = b->row;
838   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
839   int                *ai,*aj,*r;
840   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
841   MatScalar          *rtmp;
842   MatScalar          *ba = b->a,*aa,ak;
843   MatScalar          dk,uikdi;
844 
845   PetscFunctionBegin;
846   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
847   if (!a->permute){
848     ai = a->i; aj = a->j; aa = a->a;
849   } else {
850     ai = a->inew; aj = a->jnew;
851     ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
852     ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
853     ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr);
854     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
855 
856     jmin = ai[0]; jmax = ai[mbs];
857     for (j=jmin; j<jmax; j++){
858       while (r[j] != j){
859         k = r[j]; r[j] = r[k]; r[k] = k;
860         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
861       }
862     }
863     ierr = PetscFree(r);CHKERRQ(ierr);
864   }
865 
866   /* initialization */
867   /* il and jl record the first nonzero element in each row of the accessing
868      window U(0:k, k:mbs-1).
869      jl:    list of rows to be added to uneliminated rows
870             i>= k: jl(i) is the first row to be added to row i
871             i<  k: jl(i) is the row following row i in some list of rows
872             jl(i) = mbs indicates the end of a list
873      il(i): points to the first nonzero element in columns k,...,mbs-1 of
874             row i of U */
875   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
876   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
877   jl   = il + mbs;
878   for (i=0; i<mbs; i++) {
879     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
880   }
881 
882   /* for each row k */
883   for (k = 0; k<mbs; k++){
884 
885     /*initialize k-th row with elements nonzero in row perm(k) of A */
886     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
887     if (jmin < jmax) {
888       for (j = jmin; j < jmax; j++){
889         vj = rip[aj[j]];
890         rtmp[vj] = aa[j];
891       }
892     }
893 
894     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
895     dk = rtmp[k];
896     i = jl[k]; /* first row to be added to k_th row  */
897 
898     while (i < mbs){
899       nexti = jl[i]; /* next row to be added to k_th row */
900 
901       /* compute multiplier, update D(k) and U(i,k) */
902       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
903       uikdi = - ba[ili]*ba[i];
904       dk += uikdi*ba[ili];
905       ba[ili] = uikdi; /* -U(i,k) */
906 
907       /* add multiple of row i to k-th row ... */
908       jmin = ili + 1; jmax = bi[i+1];
909       if (jmin < jmax){
910         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
911         /* ... add i to row list for next nonzero entry */
912         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
913         j     = bj[jmin];
914         jl[i] = jl[j]; jl[j] = i; /* update jl */
915       }
916       i = nexti;
917     }
918 
919     /* check for zero pivot and save diagoanl element */
920     if (dk == 0.0){
921       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
922       /*
923     }else if (PetscRealPart(dk) < 0){
924       ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk);
925       */
926     }
927 
928     /* save nonzero entries in k-th row of U ... */
929     ba[k] = 1.0/dk;
930     jmin = bi[k]; jmax = bi[k+1];
931     if (jmin < jmax) {
932       for (j=jmin; j<jmax; j++){
933          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
934       }
935       /* ... add k to row list for first nonzero entry in k-th row */
936       il[k] = jmin;
937       i     = bj[jmin];
938       jl[k] = jl[i]; jl[i] = k;
939     }
940   }
941 
942   ierr = PetscFree(rtmp);CHKERRQ(ierr);
943   ierr = PetscFree(il);CHKERRQ(ierr);
944   if (a->permute){
945     ierr = PetscFree(aa);CHKERRQ(ierr);
946   }
947 
948   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
949   C->factor    = FACTOR_CHOLESKY;
950   C->assembled = PETSC_TRUE;
951   C->preallocated = PETSC_TRUE;
952   PetscLogFlops(b->mbs);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   Version for when blocks are 1 by 1 Using natural ordering
958 */
959 #undef __FUNCT__
960 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
961 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
962 {
963   Mat                C = *B;
964   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
965   int                ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
966   int                *ai,*aj;
967   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
968   MatScalar          *rtmp,*ba = b->a,*aa,dk,uikdi;
969 
970   PetscFunctionBegin;
971 
972   /* initialization */
973   /* il and jl record the first nonzero element in each row of the accessing
974      window U(0:k, k:mbs-1).
975      jl:    list of rows to be added to uneliminated rows
976             i>= k: jl(i) is the first row to be added to row i
977             i<  k: jl(i) is the row following row i in some list of rows
978             jl(i) = mbs indicates the end of a list
979      il(i): points to the first nonzero element in columns k,...,mbs-1 of
980             row i of U */
981   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
982   ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr);
983   jl   = il + mbs;
984   for (i=0; i<mbs; i++) {
985     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
986   }
987 
988   ai = a->i; aj = a->j; aa = a->a;
989 
990   /* for each row k */
991   for (k = 0; k<mbs; k++){
992 
993     /*initialize k-th row with elements nonzero in row perm(k) of A */
994     jmin = ai[k]; jmax = ai[k+1];
995     if (jmin < jmax) {
996       for (j = jmin; j < jmax; j++){
997         vj = aj[j];
998         rtmp[vj] = aa[j];
999       }
1000     }
1001 
1002     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1003     dk = rtmp[k];
1004     i = jl[k]; /* first row to be added to k_th row  */
1005 
1006     while (i < mbs){
1007       nexti = jl[i]; /* next row to be added to k_th row */
1008 
1009       /* compute multiplier, update D(k) and U(i,k) */
1010       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1011       uikdi = - ba[ili]*ba[i];
1012       dk += uikdi*ba[ili];
1013       ba[ili] = uikdi; /* -U(i,k) */
1014 
1015       /* add multiple of row i to k-th row ... */
1016       jmin = ili + 1; jmax = bi[i+1];
1017       if (jmin < jmax){
1018         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1019         /* ... add i to row list for next nonzero entry */
1020         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1021         j     = bj[jmin];
1022         jl[i] = jl[j]; jl[j] = i; /* update jl */
1023       }
1024       i = nexti;
1025     }
1026 
1027     /* check for zero pivot and save diagoanl element */
1028     if (dk == 0.0){
1029       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1030       /*
1031     }else if (PetscRealPart(dk) < 0){
1032       ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk);
1033       */
1034     }
1035 
1036     /* save nonzero entries in k-th row of U ... */
1037     ba[k] = 1.0/dk;
1038     jmin = bi[k]; jmax = bi[k+1];
1039     if (jmin < jmax) {
1040       for (j=jmin; j<jmax; j++){
1041          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1042       }
1043       /* ... add k to row list for first nonzero entry in k-th row */
1044       il[k] = jmin;
1045       i     = bj[jmin];
1046       jl[k] = jl[i]; jl[i] = k;
1047     }
1048   }
1049 
1050   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1051   ierr = PetscFree(il);CHKERRQ(ierr);
1052 
1053   C->factor    = FACTOR_CHOLESKY;
1054   C->assembled = PETSC_TRUE;
1055   C->preallocated = PETSC_TRUE;
1056   PetscLogFlops(b->mbs);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ"
1062 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
1063 {
1064   int ierr;
1065   Mat C;
1066 
1067   PetscFunctionBegin;
1068   ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr);
1069   ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr);
1070   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 
1075